def crawlListPage(self):
# ???????
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
after_tomorrow = tomorrow + datetime.timedelta(days=1)
self.openPage(
"http://hotel.tuniu.com/list/"
+ self._city
+ "p0s0b0"
+ "?checkindate="
+ tomorrow.strftime('%Y-%m-%d')
+ "&checkoutdate="
+ after_tomorrow.strftime('%Y-%m-%d')
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ??????
page_num = int(self.driver.find_element_by_xpath("//span[@class='page-num'][last()]/a").text)
# ?????
while page_num >= 1:
# ?????1
loop_num += 1
# ?????????????????(???"???"??????)
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
# ??????“???”???????????????
if u"???" in self.driver.page_source:
# ????????????
if if_handle is False:
pageData = self.driver.execute_script('return pageData')
print pageData['list'][0]['pos']
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
# ??????
try:
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='fr page-jump']/span[@class='next']").click()
page_num -= 1
# ???????????
if_handle = False
# ?????????
loop_num = 0
time.sleep(random.uniform(3, 6))
print page_num
except Exception, e:
print "error happen at clicking next-page"
print e
# ???????????
# self.driver.save_screenshot('%s.png'%page_num)
# ??????????????????????
if loop_num != 0:
# ????????????????15??????????????????????????
if loop_num < 15:
time.sleep(3)
continue
else:
break
return False if page_num > 1 else True
评论列表
文章目录