def crawlListPage(self):
self.openPage("http://hotels.ctrip.com/hotel/nanjing12#ctm_ref=hod_hp_sb_lst")
self.driver.implicitly_wait(10)
# ??????
loopNum = 0
# ?????????????False???????????
ifHandle = False
# ??????
pageNum = 140
while(pageNum>=1):
# ?????1
loopNum = loopNum + 1
# ????90%?
# js="var q=document.documentElement.scrollTop=9600"
# self.driver.execute_script(js)
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
# ??????“???”???????????????
if u"??" in self.driver.page_source:
# ????????????
if ifHandle==False:
self.__crawllianjie(self.driver.page_source)
print u"???????%d"%len(self.listPageInfo)
ifHandle = True
# ??????
try:
if u"???" in self.driver.page_source:
self.driver.find_element_by_partial_link_text(u"???").click()
#self.driver.find_element_by_xpath("//a[@class='c_down']").click()
pageNum = pageNum - 1
# ???????????
ifHandle = False
# ?????????
loopNum = 0
time.sleep(random.uniform(3, 6))
print u"???" + str(pageNum)
except:
print "error happen at clicking of nextpage"
# ??????????????????????
if loopNum != 0:
# ????????????????15??????????????????????????
if loopNum < 15:
time.sleep(3)
continue
else:
break
return False if pageNum > 1 else True
# ??????
python类PAGE_UP的实例源码
def crawlListPage(self):
# ???????
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
after_tomorrow = tomorrow + datetime.timedelta(days=1)
self.openPage(
"http://hotel.tuniu.com/list/"
+ self._city
+ "p0s0b0"
+ "?checkindate="
+ tomorrow.strftime('%Y-%m-%d')
+ "&checkoutdate="
+ after_tomorrow.strftime('%Y-%m-%d')
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ??????
page_num = int(self.driver.find_element_by_xpath("//span[@class='page-num'][last()]/a").text)
# ?????
while page_num >= 1:
# ?????1
loop_num += 1
# ?????????????????(???"???"??????)
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
# ??????“???”???????????????
if u"???" in self.driver.page_source:
# ????????????
if if_handle is False:
pageData = self.driver.execute_script('return pageData')
print pageData['list'][0]['pos']
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
# ??????
try:
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='fr page-jump']/span[@class='next']").click()
page_num -= 1
# ???????????
if_handle = False
# ?????????
loop_num = 0
time.sleep(random.uniform(3, 6))
print page_num
except Exception, e:
print "error happen at clicking next-page"
print e
# ???????????
# self.driver.save_screenshot('%s.png'%page_num)
# ??????????????????????
if loop_num != 0:
# ????????????????15??????????????????????????
if loop_num < 15:
time.sleep(3)
continue
else:
break
return False if page_num > 1 else True
def crawlListPage(self):
print '???????'
self.openPage(
"http://hotel.elong.com/nanjing/"
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ????
page_num = 0
hotel_num = int(self.driver.find_element_by_xpath("//span[@class='t24 mr5']").text)
if hotel_num % 20==0:
page_num = hotel_num/20
else:
page_num = hotel_num/20 + 1
# ?? ??5?
#page_num = 5
while page_num>=1:
loop_num += 1
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
#self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
if u"???" in self.driver.page_source:
if if_handle == False:
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
try:
#???????????????0.1s
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
while 1:
if _loading == []:
break
if u'none' in _loading[0]:
break
else:
#print '?????......'
time.sleep(0.1)
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='paging1']/a[@class='page_next']").click()
page_num -= 1
if_handle = False
loop_num = 0
time.sleep(random.uniform(1, 3))
except Exception, e:
print "error happen at clicking next-page"
print e
if loop_num != 0:
if loop_num < 15:
time.sleep(1)
continue
else:
break
return False if page_num > 1 else True