BrowserPhantomjs.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:jtyd_python_spider 作者: xtuyaowu 项目源码 文件源码
def visit(self, url, xpath=None, timeout=60, retry=1, load_images=False, **kwargs):
        if self.browser:
            self.browser.quit()
        desired_capabilities = dict()
        desired_capabilities['phantomjs.page.settings.userAgent'] = self.ua if self.ua else 'Mozilla/5.0 (Windows NT 6.1; rv:42.0) Gecko/20100101 Firefox/42.0'
        service_args = list()
        if not load_images:
            service_args += ['--load-images=false']
        if self.proxy:
            service_args += ['--proxy=%s' % self.proxy]
        DesiredCapabilities.PHANTOMJS.update(desired_capabilities)
        try:
            browser = webdriver.PhantomJS(service_args=service_args if service_args else None,
                                          desired_capabilities=DesiredCapabilities.PHANTOMJS)
        except Exception as e:
            print str(e)
            return None
        count = 0
        while (retry + 1) > count:
            count += 1
            try:
                browser.get(url)
                break
            except Exception as e:
                print str(e)
        if xpath:
            browser.implicitly_wait(timeout)
            try:
                browser.find_element_by_xpath(xpath)
            except Exception as e:
                print str(e)
        self.browser = browser
        result = browser.page_source
        return result if result != '<html><head></head><body></body></html>' else None
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号