ElongService.py 文件源码

python
阅读 16 收藏 0 点赞 0 评论 0

项目:ugc.aggregator 作者: Dreamcatcher-GIS 项目源码 文件源码
def crawlListPage(self):
        print '???????'
        self.openPage(
            "http://hotel.elong.com/nanjing/"
        )
        # ?????????(????0)
        loop_num = 0
        # ???????????False???????????
        if_handle = False

        # ????
        page_num = 0
        hotel_num = int(self.driver.find_element_by_xpath("//span[@class='t24 mr5']").text)
        if hotel_num % 20==0:
            page_num = hotel_num/20
        else:
            page_num = hotel_num/20 + 1

        # ?? ??5?
        #page_num = 5

        while page_num>=1:
            loop_num += 1
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
            #self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
            if u"???" in self.driver.page_source:
                if if_handle == False:
                    self.__parseUrls(self.driver.page_source)
                    print u"???????%d" % len(self.listPageInfo)
                    if_handle = True
                try:
                    #???????????????0.1s
                    response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
                    _loading = response.xpath("//div[@id='_loading_']/@style").extract()
                    while 1:
                        if _loading == []:
                            break
                        if u'none' in _loading[0]:
                            break
                        else:
                            #print '?????......'
                            time.sleep(0.1)
                            response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
                            _loading = response.xpath("//div[@id='_loading_']/@style").extract()
                    if u"???" in self.driver.page_source:
                        self.driver.find_element_by_xpath("//div[@class='paging1']/a[@class='page_next']").click()
                        page_num -= 1
                        if_handle = False
                        loop_num = 0
                        time.sleep(random.uniform(1, 3))
                except Exception, e:
                    print "error happen at clicking next-page"
                    print e

            if loop_num != 0:
                if loop_num < 15:
                    time.sleep(1)
                    continue
                else:
                    break
        return False if page_num > 1 else True
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号