html_downloader.py 文件源码-python代码片段

html_downloader.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

项目：wechat_spider 作者: CoolWell 项目源码文件源码

def download_articles_ph(self, url):
        '''
        ??phantomjs????
        :param url: ????
        :return:
        '''
        if url is None:
            return None
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            UA
        )
        dcap["takesScreenshot"] = (False)
        try:
            driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no'])
        except Exception as e:
            print(datetime.datetime.now())
            print(url)
            print(e)
        else:
            try:
                driver.set_page_load_timeout(30)
                driver.get(url)
                time.sleep(1)
                # driver.implicitly_wait(2)
                html = driver.page_source
                return html
            except:
                print(datetime.datetime.now())
                print(url)
            finally:
                driver.quit()