geetestcrack.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:fintech_spider 作者: hee0624 项目源码 文件源码
def __init__(self,
                 url="http://www.gsxt.gov.cn/index.html",
                 #url="http://sh.gsxt.gov.cn/notice",
                 #search_text = u"????",
                 search_text = u"????????????",
                 input_id='keyword',
                 search_element_id='btn_query',
                 gt_element_class_name='gt_box',
                 gt_slider_knob_name='gt_slider_knob',
                 result_numbers_xpath='/html/body/div[2]/div[3]/div[1]/span',
                 result_list_verify_id=None,
                 result_list_verify_class=None,
                 is_gap_every_broad=True):

        """
        url: ??????
        search_text: ??????
        input_id: ???????id
        search_element_id: ????????id
        gt_element_class_name: ??????????class?????????????????
        gt_slider_knob_name: ????????????class?????????????????
        result_numbers_xpath: ??????????? ???????xpath,??????`50`?????????
        result_list_verify_id: ?????????????id????????????????(??????ajax) or
        result_list_verify_class: ?????????????class?????????????????(??????ajax)
        is_gap_every_broad: ???????True????????????????????????????????????????
        """
        self.url = url
        self.search_text = search_text
        self.input_id = input_id
        self.search_element_id = search_element_id
        self.gt_element_class_name = gt_element_class_name
        self.gt_slider_knob_name = gt_slider_knob_name
        self.result_numbers_xpath = result_numbers_xpath
        self.result_list_verify_id = result_list_verify_id
        self.result_list_verify_class = result_list_verify_class
        self.is_gap_every_broad = is_gap_every_broad


        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
        )

        #self.driver = webdriver.PhantomJS(desired_capabilities=dcap)
        # self.driver = webdriver.Chrome("/home/hee/driver/chromedriver") # hee
        self.driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver")    # lxw


        #self.driver.maximize_window()
        time.sleep(random.uniform(2.0, 3.0))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号