def crawlListPage(self):
print '???????'
self.openPage(
"http://hotel.elong.com/nanjing/"
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ????
page_num = 0
hotel_num = int(self.driver.find_element_by_xpath("//span[@class='t24 mr5']").text)
if hotel_num % 20==0:
page_num = hotel_num/20
else:
page_num = hotel_num/20 + 1
# ?? ??5?
#page_num = 5
while page_num>=1:
loop_num += 1
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
#self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
if u"???" in self.driver.page_source:
if if_handle == False:
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
try:
#???????????????0.1s
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
while 1:
if _loading == []:
break
if u'none' in _loading[0]:
break
else:
#print '?????......'
time.sleep(0.1)
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='paging1']/a[@class='page_next']").click()
page_num -= 1
if_handle = False
loop_num = 0
time.sleep(random.uniform(1, 3))
except Exception, e:
print "error happen at clicking next-page"
print e
if loop_num != 0:
if loop_num < 15:
time.sleep(1)
continue
else:
break
return False if page_num > 1 else True
评论列表
文章目录