DriverService.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:ugc.aggregator 作者: Dreamcatcher-GIS 项目源码 文件源码
def crawlUserWeibo(self,url=None,pageHandler = None,threshold=0.3):
        if url is None:
            url = "http://weibo.com/"+str(self.userInfo["id"])
        if pageHandler is None:
            pageHandler = self.pageHandler_weibo
        # ?????
        self.driver.get(url)
        time.sleep(2)

        thresholdTime = time.time()+threshold
        loopNum = 0
        pageNum = 1
        ifHandle = False
        # ?????????????????
        while True:
            # ??????
            self.driver.find_element_by_tag_name("body").send_keys(Keys.END)

            # ??threshold???????????????
            if time.time()>thresholdTime:
                if u"???" in self.driver.page_source:
                    if ifHandle==False:
                        pageHandler(self.driver.page_source, pageNum)
                        ifHandle = True
                    try:
                        self.driver.find_element_by_partial_link_text("???").click()
                        loopNum = 0
                        ifHandle = False
                        pageNum = pageNum+1
                        print "page:"+str(pageNum)
                    except:
                        thresholdTime = time.time()+threshold
                        continue
                else:
                    thresholdTime = time.time()+threshold
                    loopNum = loopNum + 1
                    continue

            # ????????,????????????,????
            if loopNum>10:
                loopNum = 0
                self.driver.refresh()

    # ????????????????ID???????
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号