paragraph crawl

master
ZouJiu 2023-07-28 20:33:12 +08:00
parent 1919725648
commit cd65a4ec59
1 changed files with 6 additions and 4 deletions

View File

@ -383,10 +383,12 @@ def recursion(nod, article, number, driver, dircrea, bk=False):
for pnode in p_childNodes:
article, number = recursion(pnode, article, number, driver, dircrea, bk)
elif tag_name=='p':
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
for pnode in p_childNodes:
article, number = recursion(pnode, article, number, driver, dircrea, bk)
article += "\n"
try:
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
for pnode in p_childNodes:
article, number = recursion(pnode, article, number, driver, dircrea, bk)
except:
article += nod.text
elif tag_name=="div":
# atags = nod.find_elements(By.TAG_NAME, 'a')
prenode = nod.find_elements(By.TAG_NAME, 'code')