def process_request(self, request, spider):
if spider.name == "gsxt":
# print("PhantomJS is starting...")
# driver = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs") # OK
driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK
"""
# Using IP Proxies:
# ????chrome?????chrome???IP?????????????????
# ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10)
print("Get an IP proxy:", req.text)
if req.text:
proxy.http_proxy = req.text # "1.9.171.51:800"
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
"""
driver.get(request.url) # ????????????, ??http://roll.news.qq.com/??
time.sleep(2)
js = "var q=document.documentElement.scrollTop=10000"
driver.execute_script(js) # ???js????????????????????
time.sleep(3)
body = driver.page_source
print("??" + request.url)
return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)
else:
return
评论列表
文章目录