search_result.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:web_develop 作者: dongweiming 项目源码 文件源码
def save_search_result(p, queue, retry=0):
    proxy = Proxy.get_random()['address']
    url = SEARCH_URL.format(SEARCH_TEXT, p)

    try:
        r = fetch(url, proxy=proxy)
    except (Timeout, ConnectionError):
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        try:
            p = Proxy.objects.get(address=proxy)
            if p:
                p.delete()
        except DoesNotExist:
            pass

        return save_search_result(url, queue, retry)
    soup = BeautifulSoup(r.text, 'lxml')
    results = soup.find(class_='results')
    if results is None:
        # ???????, ??????
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        return save_search_result(url, queue, retry)
    articles = results.find_all(
        'div', lambda x: 'wx-rb' in x)
    for article in articles:
        save_article(article)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号