def save_search_result(p, queue, retry=0):
proxy = Proxy.get_random()['address']
url = SEARCH_URL.format(SEARCH_TEXT, p)
try:
r = fetch(url, proxy=proxy)
except (Timeout, ConnectionError):
sleep(0.1)
retry += 1
if retry > 5:
queue.put(url)
raise GreenletExit()
try:
p = Proxy.objects.get(address=proxy)
if p:
p.delete()
except DoesNotExist:
pass
return save_search_result(url, queue, retry)
soup = BeautifulSoup(r.text, 'lxml')
results = soup.find(class_='results')
if results is None:
# ???????, ??????
sleep(0.1)
retry += 1
if retry > 5:
queue.put(url)
raise GreenletExit()
return save_search_result(url, queue, retry)
articles = results.find_all(
'div', lambda x: 'wx-rb' in x)
for article in articles:
save_article(article)
评论列表
文章目录