diff --git a/crawler.py b/crawler.py index 2d5c2bb..419ac6c 100644 --- a/crawler.py +++ b/crawler.py @@ -345,10 +345,16 @@ def parser_beautiful(innerHTML, article, number, dircrea, bk=False): if 'class' in chi.attrs.keys(): classc = chi.attrs["class"] if datatex and classc and 'ztext-math' in classc: - if article[-3-1:]=='
' or article[-1:]=='\n': - article += "\n$" + chi.attrs["data-tex"] + "$" - else: - article += "$" + chi.attrs["data-tex"] + "$" + content = chi.attrs["data-tex"] + while len(content) > 0 and ' '==content[0]: + content = content[1:] + while len(content) > 0 and ' '==content[-1]: + content = content[:-1] + if len(content) > 0: + if article[-3-1:]=='
' or article[-1:]=='\n': + article += "\n$" + content + "$" + else: + article += "$" + content + "$" else: article, number = parser_beautiful(chi, article, number, dircrea, bk) # article += nod.text