def get_pic(url, mm_type):
response = my_get(url)
i = 0
while "400" in bs(response.content, "lxml").title or response.status_code == 404 or response.status_code == 400:
i += 1
if i > 5:
return
time.sleep(0.8)
response = my_get(url)
li_soup = bs(response.content, "lxml")
title = li_soup.title.text.replace(' ', '-')
if li_soup.find(lambda tag: tag.name == 'a' and '???»' in tag.text) is None:
with open("log.txt", "a") as fs:
fs.write(url + "\r\n")
fs.write(str(response.status_code) + "\r\n")
fs.write(response.content + "\r\n")
print "error" + url
else:
total_page = int(li_soup.find(lambda tag: tag.name == 'a' and '???»' in tag.text) \
.find_previous_sibling().text)
tasks = [gevent.spawn(download_pic, url + "/" + str(page), title, mm_type, ) for page in
range(1, total_page + 1)]
gevent.joinall(tasks)
评论列表
文章目录