TuniuService.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:ugc.aggregator 作者: Dreamcatcher-GIS 项目源码 文件源码
def crawlListPage(self):
        # ???????
        tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
        after_tomorrow = tomorrow + datetime.timedelta(days=1)
        self.openPage(
            "http://hotel.tuniu.com/list/"
            + self._city
            + "p0s0b0"
            + "?checkindate="
            + tomorrow.strftime('%Y-%m-%d')
            + "&checkoutdate="
            + after_tomorrow.strftime('%Y-%m-%d')
        )
        # ?????????(????0)
        loop_num = 0
        # ???????????False???????????
        if_handle = False
        # ??????
        page_num = int(self.driver.find_element_by_xpath("//span[@class='page-num'][last()]/a").text)
        # ?????
        while page_num >= 1:
            # ?????1
            loop_num += 1
            # ?????????????????(???"???"??????)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
            self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
            # ??????“???”???????????????
            if u"???" in self.driver.page_source:
                # ????????????
                if if_handle is False:
                    pageData =  self.driver.execute_script('return pageData')
                    print pageData['list'][0]['pos']
                    self.__parseUrls(self.driver.page_source)
                    print u"???????%d" % len(self.listPageInfo)
                    if_handle = True
                # ??????
                try:
                    if u"???" in self.driver.page_source:
                        self.driver.find_element_by_xpath("//div[@class='fr page-jump']/span[@class='next']").click()
                        page_num -= 1
                        # ???????????
                        if_handle = False
                        # ?????????
                        loop_num = 0
                        time.sleep(random.uniform(3, 6))
                        print page_num
                except Exception, e:
                    print "error happen at clicking next-page"
                    print e
                    # ???????????
                    # self.driver.save_screenshot('%s.png'%page_num)
            # ??????????????????????
            if loop_num != 0:
                # ????????????????15??????????????????????????
                if loop_num < 15:
                    time.sleep(3)
                    continue
                else:
                    break
        return False if page_num > 1 else True
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号