def crawlWeiboContent(self, userID, weiboID, pageHandler = None, threshold=0.2):
if pageHandler is None:
pageHandler = self.pageHandler_comment
# ????????
url = "http://weibo.com/"+userID +"/"+weiboID
print url
self.driver.get(url)
time.sleep(1)
# ?????
while True:
try:
totalPageNum = self.__getCommentNum(self.driver.page_source)/20+1
break
except:
continue
thresholdTime = time.time()+threshold
currentPageNum = 1
loopNum = 0
ifHandle = False
# ????????????
while currentPageNum<=totalPageNum:
# ??????
self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
if time.time()>thresholdTime:
# ???????????????
if u"???" in self.driver.page_source:
# ifHandle???????????????
if ifHandle==False:
pageHandler(self.driver.page_source,currentPageNum,userID,weiboID)
ifHandle = True
try:
self.driver.find_element_by_partial_link_text("???").click()
time.sleep(1)
loopNum = 0
ifHandle = False
currentPageNum = currentPageNum+1
except:
thresholdTime = time.time()+threshold
continue
else:
thresholdTime = time.time()+threshold
loopNum = loopNum + 1
continue
# ???????????????
if currentPageNum==totalPageNum:
if ifHandle==False:
pageHandler(self.driver.page_source,currentPageNum,userID,weiboID)
break
# ????????,????????????,????
if loopNum>20:
loopNum = 0
self.driver.refresh()
currentPageNum = 1
# ?????
评论列表
文章目录