def getpagenum(self,response):
try:
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_xpath("//li[@id='commentTab']/a").click()
time.sleep(3)
# ????
comments = response.xpath("//li[@id='commentTab']/a/text()").extract()[0]
commentnum = re.sub('\D','',comments)
commentpagenum = int(commentnum)/15+1
self.crawlcommentinfo(commentpagenum)
except:
pass
# ????????
python类END的实例源码
def scroll_and_click_by_partial_link_text(self, text, from_bottom=False):
if from_bottom:
# ????
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
key = Keys.ARROW_UP
else:
# ????
self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
key = Keys.ARROW_DOWN
x = 0
while 1:
x += 1
if x%500 == 0:
self.driver.refresh()
time.sleep(2)
if from_bottom:
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
else:
self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
if x == 1501:
print u"????????" + self.driver.current_url
break
self.driver.find_element_by_tag_name("body").send_keys(key)
try:
self.driver.find_element_by_partial_link_text(text).click()
break
except:
continue
def scroll_and_click_by_xpath(self, text, from_bottom=False, refresh_if_failed=True, sleep_time=0):
if from_bottom:
# ????
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
key = Keys.ARROW_UP
else:
# ????
self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
key = Keys.ARROW_DOWN
time.sleep(sleep_time)
x = 0
while 1:
x += 1
if x%500 == 0:
# ??????
if refresh_if_failed:
self.driver.refresh()
time.sleep(sleep_time)
if from_bottom:
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
else:
self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
time.sleep(sleep_time)
if x == 1501:
print u"????????" + self.driver.current_url
break
self.driver.find_element_by_tag_name("body").send_keys(key)
try:
self.driver.find_element_by_xpath(text).click()
break
except:
# print text
continue
def follow_from_recommended(browser, amount):
"""Follows given amount of users from the who to follow list"""
followed = 0
last_length = 0
#Click on the view all button on the main page to load all the recommended accounts
browser.get('https://twitter.com/who_to_follow')
body_elem = browser.find_element_by_tag_name('body')
timeline = browser.find_elements_by_xpath('//div[@id = "timeline"]/div/div[@class = "stream"]/ol/li/div/div[@class = "follow-bar"]/div/span/button[1]')
while len(timeline) < amount and len(timeline) > last_length:
last_length = len(timeline)
body_elem.send_keys(Keys.END)
sleep(2)
body_elem.send_keys(Keys.HOME)
sleep(2)
timeline = browser.find_elements_by_xpath(
'//div[@id = "timeline"]/div/div[@class = "stream"]/ol/li/div/div[@class = "follow-bar"]/div/span/button[1]')
if len(timeline) > amount:
followed = amount
else:
followed = len(timeline)
action_chain = Actions(browser)
for index, button in enumerate(timeline[:followed]):
action_chain.move_to_element(button)
action_chain.wait(1)
action_chain.click()
action_chain.wait(1)
action_chain.print_it(str(index + 1) + '/' + str(followed))
action_chain.perform()
sleep(1)
return followed
def unfollow_users(browser, amount):
"""Unfollows given amount of users"""
unfollowed = 0
last_length = 0
#Click on the view all button on the main page to load all the recommended accounts
browser.get('https://twitter.com/following')
body_elem = browser.find_element_by_tag_name('body')
timeline = browser.find_elements_by_xpath(
'//div[@class = "ProfileCard-actions"]//span[contains(@class, "user-actions-follow-button js-follow-btn follow-button")]')
while len(timeline) < amount and len(timeline) > last_length:
last_length = len(timeline)
body_elem.send_keys(Keys.END)
sleep(1)
body_elem.send_keys(Keys.HOME)
sleep(1)
timeline = browser.find_elements_by_xpath(
'//div[@class = "ProfileCard-actions"]//span[contains(@class, "user-actions-follow-button js-follow-btn follow-button")]')
if len(timeline) > amount:
unfollowed = amount
else:
unfollowed = len(timeline)
action_chain = Actions(browser)
for index, button in enumerate(timeline[:unfollowed]):
action_chain.move_to_element(button)
action_chain.wait(1)
action_chain.move_to_element(button)
action_chain.click()
action_chain.wait(1)
action_chain.print_it(str(index + 1) + '/' + str(unfollowed))
action_chain.perform()
return unfollowed
def search_Orgresource(self,skey,svalue):
Cloud_client_browser = self.driver
self.admin_cloud_login(username='org231',password='111111',assertusername='?????')
Cloud_browser_main_map = Main_Browser_UIMap(Cloud_client_browser)
time.sleep(2)
Cloud_browser_main_map.get_menu('quota').click()
EditQuota_map=EditQuota_UIMap(Cloud_client_browser)
try:
EditQuota_map.getelement('//*[@id="product-nav"]/li[12]/ul/li[2]/a').click()
except Exception, e:
Cloud_browser_main_map.get_menu('quota').click()
EditQuota_map.getelement('//*[@id="product-nav"]/li[12]/ul/li[2]/a').click()
time.sleep(2)
# js='document.getElementsByClassName("ps-scrollbar-x-rail")[1].style="left: 0px; bottom: 0px;"'
# Cloud_client_browser.execute_script(js)
# Cloud_client_browser.execute_script('document.getElementsByClassName("ps-scrollbar-x-rail")[1].style="left: 0px; bottom: 0px;"')
# Cloud_client_browser.execute_script('document.getElementsByClassName("ps-scrollbar-y-rail")[1].style="top: 0px; right: 0px; height: 950px;"')
# Cloud_client_browser.execute_script('document.getElementsByClassName("ps-scrollbar-y")[1].style="top: 0px; height: 527px;"')
#time.sleep(2)
# js='document.getElementsByClassName("product-page")[0].scrollTop=10000'
# Cloud_client_browser.execute_script(js)
#Cloud_client_browser.find_element_by_css_selector('#quota_form > div > select').click()
#time.sleep(10)
#EditQuota_map.get_select(skey,element='//*[@id="quota_form"]/div/select')
#time.sleep(2)
EditQuota_map.getelement('//*[@id="quota_form"]/div/select').click()
time.sleep(3)
# xpath="//option[text()='%s']"%skey
# print xpath
if skey=='?????':
EditQuota_map.getelement(id='search_key').send_keys(svalue)
EditQuota_map.getelement(id='search').click()
assert svalue in EditQuota_map.getelement('//*[@id="tr1"]/td[1]').text
if skey=='?????':
EditQuota_map.getelement('//*[@id="quota_form"]/div/select').send_keys(Keys.END)
EditQuota_map.getelement('//*[@id="quota_form"]/div/select').send_keys(Keys.ENTER)
EditQuota_map.getelement(id='search_key').send_keys(svalue)
EditQuota_map.getelement(id='search').click()
assert svalue in EditQuota_map.getelement('//*[@id="tr1"]/td[2]').text
def crawlUserWeibo(self,url=None,pageHandler = None,threshold=0.3):
if url is None:
url = "http://weibo.com/"+str(self.userInfo["id"])
if pageHandler is None:
pageHandler = self.pageHandler_weibo
# ?????
self.driver.get(url)
time.sleep(2)
thresholdTime = time.time()+threshold
loopNum = 0
pageNum = 1
ifHandle = False
# ?????????????????
while True:
# ??????
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
# ??threshold???????????????
if time.time()>thresholdTime:
if u"???" in self.driver.page_source:
if ifHandle==False:
pageHandler(self.driver.page_source, pageNum)
ifHandle = True
try:
self.driver.find_element_by_partial_link_text("???").click()
loopNum = 0
ifHandle = False
pageNum = pageNum+1
print "page:"+str(pageNum)
except:
thresholdTime = time.time()+threshold
continue
else:
thresholdTime = time.time()+threshold
loopNum = loopNum + 1
continue
# ????????,????????????,????
if loopNum>10:
loopNum = 0
self.driver.refresh()
# ????????????????ID???????
def crawlWeiboContent(self, userID, weiboID, pageHandler = None, threshold=0.2):
if pageHandler is None:
pageHandler = self.pageHandler_comment
# ????????
url = "http://weibo.com/"+userID +"/"+weiboID
print url
self.driver.get(url)
time.sleep(1)
# ?????
while True:
try:
totalPageNum = self.__getCommentNum(self.driver.page_source)/20+1
break
except:
continue
thresholdTime = time.time()+threshold
currentPageNum = 1
loopNum = 0
ifHandle = False
# ????????????
while currentPageNum<=totalPageNum:
# ??????
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
if time.time()>thresholdTime:
# ???????????????
if u"???" in self.driver.page_source:
# ifHandle???????????????
if ifHandle==False:
pageHandler(self.driver.page_source,currentPageNum,userID,weiboID)
ifHandle = True
try:
self.driver.find_element_by_partial_link_text("???").click()
time.sleep(1)
loopNum = 0
ifHandle = False
currentPageNum = currentPageNum+1
except:
thresholdTime = time.time()+threshold
continue
else:
thresholdTime = time.time()+threshold
loopNum = loopNum + 1
continue
# ???????????????
if currentPageNum==totalPageNum:
if ifHandle==False:
pageHandler(self.driver.page_source,currentPageNum,userID,weiboID)
break
# ????????,????????????,????
if loopNum>20:
loopNum = 0
self.driver.refresh()
currentPageNum = 1
# ?????
def crawlListPage(self):
self.openPage("http://hotels.ctrip.com/hotel/nanjing12#ctm_ref=hod_hp_sb_lst")
self.driver.implicitly_wait(10)
# ??????
loopNum = 0
# ?????????????False???????????
ifHandle = False
# ??????
pageNum = 140
while(pageNum>=1):
# ?????1
loopNum = loopNum + 1
# ????90%?
# js="var q=document.documentElement.scrollTop=9600"
# self.driver.execute_script(js)
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
# ??????“???”???????????????
if u"??" in self.driver.page_source:
# ????????????
if ifHandle==False:
self.__crawllianjie(self.driver.page_source)
print u"???????%d"%len(self.listPageInfo)
ifHandle = True
# ??????
try:
if u"???" in self.driver.page_source:
self.driver.find_element_by_partial_link_text(u"???").click()
#self.driver.find_element_by_xpath("//a[@class='c_down']").click()
pageNum = pageNum - 1
# ???????????
ifHandle = False
# ?????????
loopNum = 0
time.sleep(random.uniform(3, 6))
print u"???" + str(pageNum)
except:
print "error happen at clicking of nextpage"
# ??????????????????????
if loopNum != 0:
# ????????????????15??????????????????????????
if loopNum < 15:
time.sleep(3)
continue
else:
break
return False if pageNum > 1 else True
# ??????
def crawlListPage(self):
# ???????
tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
after_tomorrow = tomorrow + datetime.timedelta(days=1)
self.openPage(
"http://hotel.tuniu.com/list/"
+ self._city
+ "p0s0b0"
+ "?checkindate="
+ tomorrow.strftime('%Y-%m-%d')
+ "&checkoutdate="
+ after_tomorrow.strftime('%Y-%m-%d')
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ??????
page_num = int(self.driver.find_element_by_xpath("//span[@class='page-num'][last()]/a").text)
# ?????
while page_num >= 1:
# ?????1
loop_num += 1
# ?????????????????(???"???"??????)
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
# ??????“???”???????????????
if u"???" in self.driver.page_source:
# ????????????
if if_handle is False:
pageData = self.driver.execute_script('return pageData')
print pageData['list'][0]['pos']
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
# ??????
try:
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='fr page-jump']/span[@class='next']").click()
page_num -= 1
# ???????????
if_handle = False
# ?????????
loop_num = 0
time.sleep(random.uniform(3, 6))
print page_num
except Exception, e:
print "error happen at clicking next-page"
print e
# ???????????
# self.driver.save_screenshot('%s.png'%page_num)
# ??????????????????????
if loop_num != 0:
# ????????????????15??????????????????????????
if loop_num < 15:
time.sleep(3)
continue
else:
break
return False if page_num > 1 else True
def crawlListPage(self):
print '???????'
self.openPage(
"http://hotel.elong.com/nanjing/"
)
# ?????????(????0)
loop_num = 0
# ???????????False???????????
if_handle = False
# ????
page_num = 0
hotel_num = int(self.driver.find_element_by_xpath("//span[@class='t24 mr5']").text)
if hotel_num % 20==0:
page_num = hotel_num/20
else:
page_num = hotel_num/20 + 1
# ?? ??5?
#page_num = 5
while page_num>=1:
loop_num += 1
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
#self.driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_UP)
if u"???" in self.driver.page_source:
if if_handle == False:
self.__parseUrls(self.driver.page_source)
print u"???????%d" % len(self.listPageInfo)
if_handle = True
try:
#???????????????0.1s
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
while 1:
if _loading == []:
break
if u'none' in _loading[0]:
break
else:
#print '?????......'
time.sleep(0.1)
response = HtmlResponse(url="My HTML String",body=self.driver.page_source,encoding="utf-8")
_loading = response.xpath("//div[@id='_loading_']/@style").extract()
if u"???" in self.driver.page_source:
self.driver.find_element_by_xpath("//div[@class='paging1']/a[@class='page_next']").click()
page_num -= 1
if_handle = False
loop_num = 0
time.sleep(random.uniform(1, 3))
except Exception, e:
print "error happen at clicking next-page"
print e
if loop_num != 0:
if loop_num < 15:
time.sleep(1)
continue
else:
break
return False if page_num > 1 else True