middlewares.py 文件源码

python
阅读 31 收藏 0 点赞 0 评论 0

项目:fintech_spider 作者: hee0624 项目源码 文件源码
def process_request(self, request, spider):
        if spider.name == "gsxt":
            # print("PhantomJS is starting...")
            # driver = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs")   # OK
            driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK

            """
            # Using IP Proxies:
            # ????chrome?????chrome???IP?????????????????
            # ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
            proxy = webdriver.Proxy()
            proxy.proxy_type = ProxyType.MANUAL
            req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10)
            print("Get an IP proxy:", req.text)

            if req.text:
                proxy.http_proxy = req.text  # "1.9.171.51:800"
            # ????????webdriver.DesiredCapabilities.PHANTOMJS?
            proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
            driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
            """

            driver.get(request.url) # ????????????, ??http://roll.news.qq.com/??
            time.sleep(2)
            js = "var q=document.documentElement.scrollTop=10000"
            driver.execute_script(js)   # ???js????????????????????
            time.sleep(3)
            body = driver.page_source
            print("??" + request.url)
            return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)
        else:
            return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号