python类PHANTOMJS的实例源码

M_GetVideoInfo_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def main(number):
    url = 'http://www.bilibili.com/video/av' + str(number) + '/'
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
    )
    dcap["phantomjs.page.settings.loadImages"] = False
    # phantomjs.exe???G:\Anaconda3\phantomjs\bin
    driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                 desired_capabilities=dcap)
    try:
        driver.get(url)
        # time.sleep(random.uniform(1, 5))
        content = driver.page_source  # ??????
        driver.close()
        driver.quit()
        soup = BeautifulSoup(content, 'lxml')
        getInfo(soup)
    except Exception:
        pass
    finally:
        if driver:
            driver.quit()
GetVideoInfo_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def getSoup(start, stop):
    try:
        for number in range(start, stop + 1):
            url = 'http://www.bilibili.com/video/av'+str(number)+'/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False
            # phantomjs.exe???G:\Anaconda3\phantomjs\bin
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            # time.sleep(1)  # ?????????????
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()
            soup = BeautifulSoup(content, 'lxml')
            getInfo(soup)
    except Exception:
        pass
webdriver_item.py 文件源码 项目:spoon 作者: Jiramew 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_webdriver(self):
        service_args = []

        if self.webdriver_config.proxy:
            service_args.extend([
                "--proxy=" + self.webdriver_config.proxy,
                "--proxy-type=http",
                "--ignore-ssl-errors=true"
            ])

        dcapability = dict(DesiredCapabilities.PHANTOMJS)
        if self.webdriver_config.header:
            dcapability["phantomjs.page.settings.userAgent"] = self.webdriver_config.header['User-Agent']
            dcapability["phantomjs.page.customHeaders.User-Agent"] = self.webdriver_config.header['User-Agent']
        dcapability["takesScreenshot"] = True
        driver = webdriver.PhantomJS(self.webdriver_config.phantomjs_path,
                                     service_args=service_args,
                                     desired_capabilities=dcapability)

        driver.set_page_load_timeout(self.webdriver_config.timeout)
        return driver
middlewares.py 文件源码 项目:tianyancha 作者: Range0122 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def process_request(self, request, spider):
        if request.url[26] == 'c':
            ua = random.choice(self.user_agent_list)
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = ua
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(executable_path='E:\Webdriver\phantomjs-2.1.1-windows\\bin\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(request.url)
            sleep_time = random.randint(15, 22)
            time.sleep(sleep_time)
            try:
                detail = driver.find_element_by_xpath('//a[@ng-click="showDetail = btnOnClick(showDetail)"]')
                detail.click()
            except:
                pass
            body = driver.page_source
            url = driver.current_url
            driver.quit()
            return HtmlResponse(url=url, body=body, request=request, encoding='utf-8')
ebookjapan.py 文件源码 项目:mindl-legacy 作者: MinoMino 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, url):
        self.url = url
        self.book_name = "N/A"
        self.book_volume = None

        # Set the user agent to something generic.
        dc = dict(DC.PHANTOMJS)
        dc["phantomjs.page.settings.userAgent"] = USER_AGENT

        self.d = webdriver.PhantomJS(desired_capabilities=dc,
            service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any", "--web-security=false", "--ssl-protocol=TLSv1"])
        # Set cookies that makes it think we previously agreed to the ToS.
        self.d.add_cookie({"name": "tachiyomi_auto_reader", "value": "Browser", "domain": ".ebookjapan.jp", "path": "/"})
        self.d.add_cookie({"name": "tachiyomi_user_policy", "value": "on", "domain": ".ebookjapan.jp", "path": "/"})
        self.d.set_window_size(1120, 550)
        # Generic waiter.
        self.wait = WebDriverWait(self.d, 60)
comic.py 文件源码 项目:ComicSpider 作者: QuantumLiu 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def get_pages(self):
        '''
        ??Phantomjs??????????????????url
        Get all pages' urls using selenium an phantomJS
        return:
            a list of tuple (page_num,page_url)
        '''
        r_slt=r'onchange="select_page\(\)">([\s\S]*?)</select>'
        r_p=r'<option value="(.*?)".*?>?(\d*?)?<'
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # ???????????????
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(self.chapter_url)
            text=driver.page_source
            st=re.findall(r_slt,text)[0]
            self.pages = [(int(p[-1]),p[0]) for p in re.findall(r_p,st)]
        except Exception:
            traceback.print_exc()
            self.pages = []
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        finally:
            driver.quit()
            print('Got {l} pages in chapter {ch}'.format(l=len(self.pages),ch=self.chapter_title))
            return self.pages
selenium.py 文件源码 项目:SerpScrap 作者: ecoron 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def _get_PhantomJS(self):
        try:
            service_args = []

            if self.proxy:
                service_args.extend([
                    '--proxy={}:{}'.format(self.proxy.host, self.proxy.port),
                    '--proxy-type={}'.format(self.proxy.proto),
                ])

                if self.proxy.username and self.proxy.password:
                    service_args.append(
                        '--proxy-auth={}:{}'.format(
                            self.proxy.username,
                            self.proxy.password
                        )
                    )

            useragent = random_user_agent(
                mobile=False
            )
            logger.info('useragent: {}'.format(useragent))
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = useragent
            try:
                self.webdriver = webdriver.PhantomJS(
                    executable_path=self.config['executable_path'],
                    service_args=service_args,
                    desired_capabilities=dcap
                )
                return True
            except (ConnectionError, ConnectionRefusedError, ConnectionResetError) as err:
                logger.error(err)
                return False
        except WebDriverException as e:
            logger.error(e)
        return False
GetUserFansId.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            # url = 'http://space.bilibili.com/122879/#!/'

            # "http://http://space.bilibili.com/122879/#!/"
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            # executable_path='D:\\Chrome\\phantomjs-2.1.1-windows\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe',
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            # print(content)
            driver.close()
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup) # ?????
            uid = number # number??uid

            get_fans_uid = GetFansUid.GetFansUid(number)
            fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????
            print(uid, username, fansnumber)

            saveData(uid, username, fansnumber, fansuid)# ?????
    except Exception:
        print("get page error")
        return getSoup(number + 1, stop+1)


# ????
M_GetFans_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def main(number):
    url = 'http://space.bilibili.com/' + str(number) + '/#!/'
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
    )
    dcap["phantomjs.page.settings.loadImages"] = False  # ??????????
    driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                 desired_capabilities=dcap)
    try:
        driver.get(url)
        content = driver.page_source  # ??????
        driver.close()
        driver.quit()  # ??????????????
        soup = BeautifulSoup(content, 'lxml')
        username = getInfo(soup)  # ?????
        uid = number  # number??uid
        get_fans_uid = GetFansUid(number)
        fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????

        saveData(uid, username, fansnumber, fansuid)  # ?????
    except Exception:
        pass
    finally:
        if driver:
            driver.quit()
GetBilibiliUser_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def getSoup(start, stop):
    try:
        for number in range(start, stop+1):
            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            # "http://space.bilibili.com/1643718/#!/"
            # "http://space.bilibili.com/902915/#!/"
            # "http://space.bilibili.com/1/#!/"
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            # phantomjs.exe???G:\Anaconda3\phantomjs\bin
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            # time.sleep(1)  # ?????????????
            content = driver.page_source  # ??????
            # print(content)
            driver.close()
            driver.quit()
            soup = BeautifulSoup(content, 'lxml')
            getInfo(soup)
    except Exception:
        pass


# ????
GetFans_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()  #??????????????
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup)  # ?????
            uid = number  # number??uid
            get_fans_uid = GetFansUid(number)
            fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????

            saveData(uid, username, fansnumber, fansuid)  # ?????
    except Exception:
        print("get page error")
        return getSoup(number+1, stop+1)


# ????
GetFollow_Oracle.py 文件源码 项目:danmu-bilibili 作者: saberxxy 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()  #??????????????
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup)  # ?????
            uid = number  # number??uid
            get_gz_uid = GetFollowUid(number)
            gzsuid, gznumber = get_gz_uid.get_uids()  # ????id?????

            saveData(uid, username, gznumber, gzsuid)  # ?????
    except Exception:
        print("get page error")
        return getSoup(number+1, stop+1)


# ????
webdriver.py 文件源码 项目:devsecops-example-helloworld 作者: boozallen 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
test_edit_handler.py 文件源码 项目:wagtailannotatedimage 作者: takeflight 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setUpClass(cls):
        super(TestEditHandler, cls).setUpClass()
        caps = DesiredCapabilities.PHANTOMJS
        # caps['loggingPrefs'] = { 'browser':'ALL' }
        cls.driver = webdriver.PhantomJS(desired_capabilities=caps)

        cls.driver.set_window_size(1920, 1080)
        cls.driver.implicitly_wait(10)
fbscrape.py 文件源码 项目:open-source-feeds 作者: mhfowler 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def initialize_driver(self, driver=None):
        if self.command_executor:
            chrome_options = Options()
            chrome_options.add_argument("--disable-notifications")
            if self.proxy:
                chrome_options.add_argument('--proxy-server=%s' % self.proxy)
            self.driver = webdriver.Remote(
                command_executor=self.command_executor,
                desired_capabilities=chrome_options.to_capabilities()
            )
        else:
            if self.which_driver == 'phantomjs':
                dcap = dict(DesiredCapabilities.PHANTOMJS)
                dcap["phantomjs.page.settings.userAgent"] = (
                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
                    "(KHTML, like Gecko) Chrome/15.0.87"
                )
                driver = webdriver.PhantomJS(desired_capabilities=dcap)
                driver.set_window_size(1400, 1000)
                self.driver = driver
            elif self.which_driver == 'chrome':
                chrome_options = Options()
                chrome_options.add_argument("--disable-notifications")
                if self.proxy:
                    chrome_options.add_argument('--proxy-server=%s' % self.proxy)
                self.driver = webdriver.Chrome(chrome_options=chrome_options)
            # otherwise use the driver passed in
            else:
                self.driver = driver
        # set page load timeout
        self.driver.set_page_load_timeout(time_to_wait=240)
webdriver.py 文件源码 项目:flasky 作者: RoseOu 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
isp_data_pollution.py 文件源码 项目:isp-data-pollution 作者: essandess 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def open_driver(self):
        self.quit_driver()
        if not hasattr(self, 'driver') or not isinstance(self.driver,webdriver.phantomjs.webdriver.WebDriver):
            # phantomjs driver
            # http://engineering.shapesecurity.com/2015/01/detecting-phantomjs-based-visitors.html
            # https://coderwall.com/p/9jgaeq/set-phantomjs-user-agent-string
            # http://phantomjs.org/api/webpage/property/settings.html
            # http://stackoverflow.com/questions/23390974/phantomjs-keeping-cache
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # dcap['browserName'] = 'Chrome'
            dcap['phantomjs.page.settings.userAgent'] = ( self.user_agent )
            dcap['phantomjs.page.settings.loadImages'] = ( 'false' )
            dcap['phantomjs.page.settings.clearMemoryCaches'] = ( 'true' )
            dcap['phantomjs.page.settings.resourceTimeout'] = ( max(2000,int(self.timeout * 1000)) )
            dcap['acceptSslCerts'] = ( True )
            dcap['applicationCacheEnabled'] = ( True )
            dcap['handlesAlerts'] = ( False )
            dcap['phantomjs.page.customHeaders'] = ( { 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, sdch' } )
            phantomjs_service_args = ['--disk-cache=false','--ignore-ssl-errors=false','--ssl-protocol=TLSv1.2']
            if self.proxy is not None:
                phantomjs_service_args = ['--proxy={}'.format(self.proxy)] + phantomjs_service_args
            if self.phantomjs_binary_path is None:
                driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=phantomjs_service_args)
            else:
                driver = webdriver.PhantomJS(self.phantomjs_binary_path,desired_capabilities=dcap,service_args=phantomjs_service_args)
            driver.set_window_size(1296,1018)   # Tor browser size on Linux
            driver.implicitly_wait(self.timeout)
            driver.set_page_load_timeout(self.timeout)
            driver.set_script_timeout(self.timeout)
            self.driver = driver
webdriver.py 文件源码 项目:leetcode 作者: thomasyimgit 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(
            executable_path,
            port=port,
            service_args=service_args,
            log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise

        self._is_remote = False
drivers.py 文件源码 项目:cabu 作者: thylong 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def load_phantomjs(config):
    """Start PhantomJS webdriver with the given configuration.

    Args:
        config (dict): The configuration loaded previously in Cabu.

    Returns:
        webdriver (selenium.webdriver): An instance of phantomJS webdriver.

    """
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    service_args = [
        '--ignore-ssl-errors=true',
        '--ssl-protocol=any',
        '--web-security=false'
    ]

    if os.environ.get('HTTPS_PROXY') or os.environ.get('HTTP_PROXY'):
        proxy_address = os.environ.get('HTTPS_PROXY', os.environ.get('HTTP_PROXY'))
        proxy_ip = re.search('http\:\/\/(.*)$', proxy_address).group(1)
        service_args.append('--proxy=%s' % proxy_ip)
        service_args.append('--proxy-type=http')

    if 'HEADERS' in config and config['HEADERS']:
        dcap = Headers(config).set_headers(dcap)

    return webdriver.PhantomJS(
        desired_capabilities=dcap,
        service_args=service_args,
        service_log_path=os.path.devnull
    )
test_headers.py 文件源码 项目:cabu 作者: thylong 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_phantomjs_headers_loading(self):
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        headers = Headers(self.config).set_headers(dcap)
        self.assertEquals(
            headers['phantomjs.page.customHeaders.User-Agent'],
            'Mozilla/6.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36'
            ' (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36'
        )
test_headers.py 文件源码 项目:cabu 作者: thylong 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_chrome_headers_loading(self):
        self.app.config['DRIVER_NAME'] = 'Chrome'
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        with self.assertRaises(Exception):
            Headers(self.config).set_headers(dcap)
BaseCrawler.py 文件源码 项目:selenium-image-crawler 作者: scirag 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def create_selenium_driver(self):
        # driver = webdriver.Chrome()
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
        )
        driver = webdriver.PhantomJS(desired_capabilities=dcap)
        driver.set_window_size(1024, 768)
        return driver
LoginController.py 文件源码 项目:weiboCrawler 作者: hjydzh 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_browser():
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept-Language'] = 'zh-CN,zh;q=0.8'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Connection'] = 'keep-alive'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept-Encoding'] = 'gzip, deflate, sdch'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Cache-Control'] = 'max-age=0'
    phantomjs_path = "G:\\programeSoftwares\\python2.7\\Scripts\\phantomjs.exe"
    dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36")
    #browser = webdriver.PhantomJS(desired_capabilities=dcap)
    browser = webdriver.PhantomJS(desired_capabilities=dcap,executable_path=phantomjs_path)
        #self.browser = webdriver.PhantomJS(desired_capabilities=dcap
    browser.set_window_size(1920, 1080)
    return browser
webdriver.py 文件源码 项目:ShuoshuoMonitor 作者: aploium 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
BrowserPhantomjs.py 文件源码 项目:jtyd_python_spider 作者: xtuyaowu 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def visit(self, url, xpath=None, timeout=60, retry=1, load_images=False, **kwargs):
        if self.browser:
            self.browser.quit()
        desired_capabilities = dict()
        desired_capabilities['phantomjs.page.settings.userAgent'] = self.ua if self.ua else 'Mozilla/5.0 (Windows NT 6.1; rv:42.0) Gecko/20100101 Firefox/42.0'
        service_args = list()
        if not load_images:
            service_args += ['--load-images=false']
        if self.proxy:
            service_args += ['--proxy=%s' % self.proxy]
        DesiredCapabilities.PHANTOMJS.update(desired_capabilities)
        try:
            browser = webdriver.PhantomJS(service_args=service_args if service_args else None,
                                          desired_capabilities=DesiredCapabilities.PHANTOMJS)
        except Exception as e:
            print str(e)
            return None
        count = 0
        while (retry + 1) > count:
            count += 1
            try:
                browser.get(url)
                break
            except Exception as e:
                print str(e)
        if xpath:
            browser.implicitly_wait(timeout)
            try:
                browser.find_element_by_xpath(xpath)
            except Exception as e:
                print str(e)
        self.browser = browser
        result = browser.page_source
        return result if result != '<html><head></head><body></body></html>' else None
browser_server.py 文件源码 项目:jtyd_python_spider 作者: xtuyaowu 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get(self, url, xpath, timeout, retry, service_args, desired_capabilities):
        browser = None
        try:
            result = dict()
            if desired_capabilities:
                DesiredCapabilities.PHANTOMJS.update(json.loads(desired_capabilities))
            browser = webdriver.PhantomJS(service_args=json.loads(service_args) if service_args else None,
                                          desired_capabilities=DesiredCapabilities.PHANTOMJS)
            count = 0
            while (retry + 1) > count:
                count += 1
                try:
                    browser.get(url)
                    break
                except Exception as e:
                    print str(e)
            if xpath:
                browser.implicitly_wait(timeout)
                try:
                    browser.find_element_by_xpath(xpath)
                except Exception as e:
                    print str(e)
            text = browser.page_source
            if text == '<html><head></head><body></body></html>':
                browser.quit()
                return ''
            result['cookies'] = browser.get_cookies()
            result['text'] = text.encode('utf-8')
            browser.quit()
            return json.dumps(result)
        except Exception as e:
            if browser:
                browser.quit()
            print str(e)
            return ''
html_downloader.py 文件源码 项目:wechat_spider 作者: CoolWell 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def download_articles_ph(self, url):
        '''
        ??phantomjs????
        :param url: ????
        :return:
        '''
        if url is None:
            return None
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            UA
        )
        dcap["takesScreenshot"] = (False)
        try:
            driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no'])
        except Exception as e:
            print(datetime.datetime.now())
            print(url)
            print(e)
        else:
            try:
                driver.set_page_load_timeout(30)
                driver.get(url)
                time.sleep(1)
                # driver.implicitly_wait(2)
                html = driver.page_source
                return html
            except:
                print(datetime.datetime.now())
                print(url)
            finally:
                driver.quit()
html_downloader.py 文件源码 项目:wechat_spider 作者: CoolWell 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def maintain_cookies_ph(self):
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = UA
        cookie = []
        # ??5?cookies
        for i in range(5):
            driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no', ])
            driver.get("http://weixin.sogou.com/")
            # ??cookie??
            cookie.append(driver.get_cookies())
            # print(driver.get_cookies())
            driver.quit()
        return cookie
snapper.py 文件源码 项目:Snapper 作者: dxa4481 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def host_worker(hostQueue, fileQueue, timeout, user_agent, verbose):
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = user_agent
    dcap["accept_untrusted_certs"] = True
    driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true'], desired_capabilities=dcap) # or add to your PATH
    driver.set_window_size(1024, 768) # optional
    driver.set_page_load_timeout(timeout)
    while(not hostQueue.empty()):
        host = hostQueue.get()
        if not host.startswith("http://") and not host.startswith("https://"):
            host1 = "http://" + host
            host2 = "https://" + host
            filename1 = os.path.join("output", "images", str(uuid4()) + ".png")
            filename2 = os.path.join("output", "images", str(uuid4()) + ".png")
            if verbose:
                print("Fetching %s" % host1)
            if host_reachable(host1, timeout) and save_image(host1, filename1, driver):
                fileQueue.put({host1: filename1})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host1)
            if verbose:
                print("Fetching %s" % host2)
            if host_reachable(host2, timeout) and save_image(host2, filename2, driver):
                fileQueue.put({host2: filename2})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host2)
        else:
            filename = os.path.join("output", "images", str(uuid4()) + ".png")
            if verbose:
                print("Fetching %s" % host)
            if host_reachable(host, timeout) and save_image(host, filename, driver):
                fileQueue.put({host: filename})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host)
phantomjs.py 文件源码 项目:trackship 作者: nabeelio 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, user_agent=None, cookies_file=None):
        """
        Initialize the phantom JS selenium driver
        :return:
        """
        self.conf = config
        self.user_agent = user_agent
        self.cookies_file = cookies_file

        # http://phantomjs.org/api/webpage/property/settings.html
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap['phantomjs.page.settings.loadImages'] = False
        dcap['phantomjs.page.settings.webSecurityEnabled'] = False
        dcap['phantomjs.page.settings.localToRemoteUrlAccessEnabled'] = True

        if user_agent:
            dcap['phantomjs.page.settings.userAgent'] = user_agent

        self.driver = webdriver.PhantomJS(
            desired_capabilities=dcap,
            executable_path=self.conf['general']['phantomjs'],
        )

        self.load_cookies()

        self.driver.implicitly_wait(30)
        self.driver.set_window_size(1024, 768)


问题


面经


文章

微信
公众号

扫码关注公众号