paragraph crawl
parent
1919725648
commit
cd65a4ec59
|
@ -383,10 +383,12 @@ def recursion(nod, article, number, driver, dircrea, bk=False):
|
||||||
for pnode in p_childNodes:
|
for pnode in p_childNodes:
|
||||||
article, number = recursion(pnode, article, number, driver, dircrea, bk)
|
article, number = recursion(pnode, article, number, driver, dircrea, bk)
|
||||||
elif tag_name=='p':
|
elif tag_name=='p':
|
||||||
|
try:
|
||||||
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
|
p_childNodes = driver.execute_script("return arguments[0].childNodes;", nod)
|
||||||
for pnode in p_childNodes:
|
for pnode in p_childNodes:
|
||||||
article, number = recursion(pnode, article, number, driver, dircrea, bk)
|
article, number = recursion(pnode, article, number, driver, dircrea, bk)
|
||||||
article += "\n"
|
except:
|
||||||
|
article += nod.text
|
||||||
elif tag_name=="div":
|
elif tag_name=="div":
|
||||||
# atags = nod.find_elements(By.TAG_NAME, 'a')
|
# atags = nod.find_elements(By.TAG_NAME, 'a')
|
||||||
prenode = nod.find_elements(By.TAG_NAME, 'code')
|
prenode = nod.find_elements(By.TAG_NAME, 'code')
|
||||||
|
|
Loading…
Reference in New Issue