isp_data_pollution.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:isp-data-pollution 作者: essandess 项目源码 文件源码
def open_driver(self):
        self.quit_driver()
        if not hasattr(self, 'driver') or not isinstance(self.driver,webdriver.phantomjs.webdriver.WebDriver):
            # phantomjs driver
            # http://engineering.shapesecurity.com/2015/01/detecting-phantomjs-based-visitors.html
            # https://coderwall.com/p/9jgaeq/set-phantomjs-user-agent-string
            # http://phantomjs.org/api/webpage/property/settings.html
            # http://stackoverflow.com/questions/23390974/phantomjs-keeping-cache
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # dcap['browserName'] = 'Chrome'
            dcap['phantomjs.page.settings.userAgent'] = ( self.user_agent )
            dcap['phantomjs.page.settings.loadImages'] = ( 'false' )
            dcap['phantomjs.page.settings.clearMemoryCaches'] = ( 'true' )
            dcap['phantomjs.page.settings.resourceTimeout'] = ( max(2000,int(self.timeout * 1000)) )
            dcap['acceptSslCerts'] = ( True )
            dcap['applicationCacheEnabled'] = ( True )
            dcap['handlesAlerts'] = ( False )
            dcap['phantomjs.page.customHeaders'] = ( { 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, sdch' } )
            phantomjs_service_args = ['--disk-cache=false','--ignore-ssl-errors=false','--ssl-protocol=TLSv1.2']
            if self.proxy is not None:
                phantomjs_service_args = ['--proxy={}'.format(self.proxy)] + phantomjs_service_args
            if self.phantomjs_binary_path is None:
                driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=phantomjs_service_args)
            else:
                driver = webdriver.PhantomJS(self.phantomjs_binary_path,desired_capabilities=dcap,service_args=phantomjs_service_args)
            driver.set_window_size(1296,1018)   # Tor browser size on Linux
            driver.implicitly_wait(self.timeout)
            driver.set_page_load_timeout(self.timeout)
            driver.set_script_timeout(self.timeout)
            self.driver = driver
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号