paragraph crawl

master
ZouJiu 2023-07-28 20:33:12 +08:00
parent 1919725648
commit cd65a4ec59
1 changed files with 6 additions and 4 deletions

View File

@ -383,10 +383,12 @@ def recursion(nod, article, number, driver, dircrea, bk=False):
for pnode in p_childNodes: for pnode in p_childNodes:
article, number = recursion(pnode, article, number, driver, dircrea, bk) article, number = recursion(pnode, article, number, driver, dircrea, bk)
elif tag_name=='p': elif tag_name=='p':
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod) try:
for pnode in p_childNodes: p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
article, number = recursion(pnode, article, number, driver, dircrea, bk) for pnode in p_childNodes:
article += "\n" article, number = recursion(pnode, article, number, driver, dircrea, bk)
except:
article += nod.text
elif tag_name=="div": elif tag_name=="div":
# atags = nod.find_elements(By.TAG_NAME, 'a') # atags = nod.find_elements(By.TAG_NAME, 'a')
prenode = nod.find_elements(By.TAG_NAME, 'code') prenode = nod.find_elements(By.TAG_NAME, 'code')