search.py 文件源码

python
阅读 35 收藏 0 点赞 0 评论 0

项目:Bluto 作者: darryllane 项目源码 文件源码
def doc_bing(domain, user_agents, prox, q):
    document_list = []
    uas = user_agents
    info('Bing Document Search Started')
    for start in range(1,300,10):
        ua = random.choice(uas)
        if prox == True:
            proxy = {'http' : 'http://127.0.0.1:8080'}
        else:
            pass
        try:
            headers = {"Connection" : "close",
                       "User-Agent" : ua,
                       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                       'Accept-Language': 'en-US,en;q=0.5',
                       'Accept-Encoding': 'gzip, deflate'}
            payload = { 'q': 'filetype:(doc dot docx docm dotx dotm docb xls xlt xlm xlsx xlsm xltx xltm xlsb xla xlam xll xlw ppt pot pps pptx pptm potx potm ppam ppsx ppsm sldx sldm pub pdf) site:{}'.format(domain), 'first': start}
            link = 'http://www.bing.com/search'
            if prox == True:
                response = requests.get(link, headers=headers, proxies=proxy, params=payload, verify=False)
            else:
                response = requests.get(link, headers=headers, params=payload, verify=False)

            soup = BeautifulSoup(response.text, "lxml")

            divs = soup.findAll('li', {'class': 'b_algo'})
            for div in divs:
                h2 = div.find('h2')
                document = h2.find('a', href=True)['href']
                document = urllib2.unquote(document)
                document_list.append(document)
        except requests.models.ChunkedEncodingError:
            continue
        except Exception:
            traceback.print_exc()
            continue
    potential_docs = len(document_list)
    info('Bing Document Search Finished')
    q.put(document_list)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号