21 lines
797 B
Plaintext
21 lines
797 B
Plaintext
|
|
|||
|
def start3(self):
|
|||
|
num = 0
|
|||
|
articles = [None]
|
|||
|
while len(articles) > 0:
|
|||
|
num += 1
|
|||
|
url = u'https://xuesong.blog.csdn.net/category_8454447_' + str(num) + '.html'
|
|||
|
print(url)
|
|||
|
# url = u'https://' + self.username + u'.blog.csdn.net/article/list/' + str(num)
|
|||
|
scraper = cloudscraper.create_scraper() # returns a CloudScraper instance
|
|||
|
response = scraper.get(url,headers=self.headers)
|
|||
|
|
|||
|
# response = self.s.get(url=url, headers=self.headers)
|
|||
|
html = response.text
|
|||
|
soup = BeautifulSoup(html, "html.parser")
|
|||
|
articles = soup.find_all('div', attrs={"class":"column_article_title"})
|
|||
|
for article in articles:
|
|||
|
article_title = article.a.text.strip().replace(' ',':')
|
|||
|
article_href = article.a['href']
|
|||
|
self.TaskQueue.append((article_title, article_href))
|
|||
|
|