mziSpider.py 文件源码-python代码片段

mziSpider.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

项目：mzituSpider 作者: DarkSand 项目源码文件源码

def get_pic(url, mm_type):
    response = my_get(url)
    i = 0
    while "400" in bs(response.content, "lxml").title or response.status_code == 404 or response.status_code == 400:
        i += 1
        if i > 5:
            return
        time.sleep(0.8)
        response = my_get(url)
    li_soup = bs(response.content, "lxml")
    title = li_soup.title.text.replace(' ', '-')
    if li_soup.find(lambda tag: tag.name == 'a' and '???»' in tag.text) is None:
        with open("log.txt", "a") as fs:
            fs.write(url + "\r\n")
            fs.write(str(response.status_code) + "\r\n")
            fs.write(response.content + "\r\n")
        print "error" + url
    else:
        total_page = int(li_soup.find(lambda tag: tag.name == 'a' and '???»' in tag.text) \
                         .find_previous_sibling().text)
        tasks = [gevent.spawn(download_pic, url + "/" + str(page), title, mm_type, ) for page in
                 range(1, total_page + 1)]
        gevent.joinall(tasks)