middlewares.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:Spider 作者: poluo 项目源码 文件源码
def process_request(self, request, spider):
        try:
            selenium_enable = request.meta.get('selenium')
        except Exception as e:
            log.info(e)
            selenium_enable = False
        if selenium_enable:
            self.driver.get(request.url)
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR,
                     "#js-fans-rank > div > div.f-con > div.f-cn.cur > ul > li> a"))
            )
            body = self.driver.page_source
            response = HtmlResponse(url=self.driver.current_url, body=body, request=request, encoding='utf8')
            return response
        else:
            request.headers[
                'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
            request.headers[
                'Accept'] = '*/*'
            request.headers['Accept-Encoding'] = 'gzip, deflate, sdch, br'
            request.headers['Accept-Language'] = 'zh-CN,zh;q=0.8,zh-TW;q=0.6'
            request.headers['Connection'] = 'keep-alive'
            request.headers['Host'] = 'www.douyu.com'
            request.headers['Upgrade-Insecure-Requests'] = 1

            try:
                cookies_enable = request.meta.get('cookies')
            except Exception as e:
                log.info(e)
                cookies_enable = False
            if cookies_enable:
                del request.headers['Upgrade-Insecure-Requests']
                request.headers['DNT'] = '1'
                request.headers['X-Requested-With'] = 'XMLHttpRequest'
                request.headers['referer'] = request.meta['referer']
                self.cookies['_dys_lastPageCode'] = request.meta.get('_dys_lastPageCode')
                self.cookies['_dys_refer_action_code'] = request.meta.get('_dys_refer_action_code')
                request.cookies = self.cookies
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号