middlewares.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:tianyancha 作者: Range0122 项目源码 文件源码
def process_request(self, request, spider):
        if request.url[26] == 'c':
            ua = random.choice(self.user_agent_list)
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = ua
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(executable_path='E:\Webdriver\phantomjs-2.1.1-windows\\bin\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(request.url)
            sleep_time = random.randint(15, 22)
            time.sleep(sleep_time)
            try:
                detail = driver.find_element_by_xpath('//a[@ng-click="showDetail = btnOnClick(showDetail)"]')
                detail.click()
            except:
                pass
            body = driver.page_source
            url = driver.current_url
            driver.quit()
            return HtmlResponse(url=url, body=body, request=request, encoding='utf-8')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号