python类PAGE_UP的实例源码-面圈网

DriveServices.py 文件源码项目：ugc.aggregator 作者: Dreamcatcher-GIS 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def crawlListPage(self):
        self.openPage("http://hotels.ctrip.com/hotel/nanjing12#ctm_ref=hod_hp_sb_lst")
        self.driver.implicitly_wait(10)
        # ??????
        loopNum = 0
        # ?????????????False???????????
        ifHandle = False
        # ??????
        pageNum = 140
        while(pageNum>=1):
            # ?????1
            loopNum = loopNum + 1
            # ????90%?
            # js="var q=document.documentElement.scrollTop=9600"
            # self.driver.execute_script(js)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
            # ??????“???”???????????????
            if u"??" in self.driver.page_source:
                # ????????????
                if ifHandle==False:
                    self.__crawllianjie(self.driver.page_source)
                    print u"???????%d"%len(self.listPageInfo)
                    ifHandle = True
                # ??????
                try:
                    if u"???" in self.driver.page_source:
                        self.driver.find_element_by_partial_link_text(u"???").click()
                        #self.driver.find_element_by_xpath("//a[@class='c_down']").click()
                        pageNum = pageNum - 1
                        # ???????????
                        ifHandle = False
                        # ?????????
                        loopNum = 0
                        time.sleep(random.uniform(3, 6))
                        print u"???" +  str(pageNum)
                except:
                    print "error happen at clicking of nextpage"
            # ??????????????????????
            if loopNum != 0:
                # ????????????????15??????????????????????????
                if loopNum < 15:
                    time.sleep(3)
                    continue
                else:
                    break
        return False if pageNum > 1 else True

    # ??????

TuniuService.py 文件源码项目：ugc.aggregator 作者: Dreamcatcher-GIS 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def crawlListPage(self):
        # ???????
        tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
        after_tomorrow = tomorrow + datetime.timedelta(days=1)
        self.openPage(
            "http://hotel.tuniu.com/list/"
            + self._city
            + "p0s0b0"
            + "?checkindate="
            + tomorrow.strftime('%Y-%m-%d')
            + "&checkoutdate="
            + after_tomorrow.strftime('%Y-%m-%d')
        )
        # ?????????(????0)
        loop_num = 0
        # ???????????False???????????
        if_handle = False
        # ??????
        page_num = int(self.driver.find_element_by_xpath("//span[@class='page-num'][last()]/a").text)
        # ?????
        while page_num >= 1:
            # ?????1
            loop_num += 1
            # ?????????????????(???"???"??????)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
            # ??????“???”???????????????
            if u"???" in self.driver.page_source:
                # ????????????
                if if_handle is False:
                    pageData =  self.driver.execute_script('return pageData')
                    print pageData['list'][0]['pos']
                    self.__parseUrls(self.driver.page_source)
                    print u"???????%d" % len(self.listPageInfo)
                    if_handle = True
                # ??????
                try:
                    if u"???" in self.driver.page_source:
                        self.driver.find_element_by_xpath("//div[@class='fr page-jump']/span[@class='next']").click()
                        page_num -= 1
                        # ???????????
                        if_handle = False
                        # ?????????
                        loop_num = 0
                        time.sleep(random.uniform(3, 6))
                        print page_num
                except Exception, e:
                    print "error happen at clicking next-page"
                    print e
                    # ???????????
                    # self.driver.save_screenshot('%s.png'%page_num)
            # ??????????????????????
            if loop_num != 0:
                # ????????????????15??????????????????????????
                if loop_num < 15:
                    time.sleep(3)
                    continue
                else:
                    break
        return False if page_num > 1 else True

ElongService.py 文件源码项目：ugc.aggregator 作者: Dreamcatcher-GIS 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def crawlListPage(self):
        print '???????'
        self.openPage(
            "http://hotel.elong.com/nanjing/"
        )
        # ?????????(????0)
        loop_num = 0
        # ???????????False???????????
        if_handle = False

        # ????
        page_num = 0
        hotel_num = int(self.driver.find_element_by_xpath("//span[@class='t24 mr5']").text)
        if hotel_num % 20==0:
            page_num = hotel_num/20
        else:
            page_num = hotel_num/20 + 1

        # ?? ??5?
        #page_num = 5

        while page_num>=1:
            loop_num += 1
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
            #self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
            if u"???" in self.driver.page_source:
                if if_handle == False:
                    self.__parseUrls(self.driver.page_source)
                    print u"???????%d" % len(self.listPageInfo)
                    if_handle = True
                try:
                    #???????????????0.1s
                    response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
                    _loading = response.xpath("//div[@id='_loading_']/@style").extract()
                    while 1:
                        if _loading == []:
                            break
                        if u'none' in _loading[0]:
                            break
                        else:
                            #print '?????......'
                            time.sleep(0.1)
                            response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
                            _loading = response.xpath("//div[@id='_loading_']/@style").extract()
                    if u"???" in self.driver.page_source:
                        self.driver.find_element_by_xpath("//div[@class='paging1']/a[@class='page_next']").click()
                        page_num -= 1
                        if_handle = False
                        loop_num = 0
                        time.sleep(random.uniform(1, 3))
                except Exception, e:
                    print "error happen at clicking next-page"
                    print e

            if loop_num != 0:
                if loop_num < 15:
                    time.sleep(1)
                    continue
                else:
                    break
        return False if page_num > 1 else True