MHRW.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:Crawling-SinaWeibo 作者: Uzumaki-C 项目源码 文件源码
def parse3_fans(self, response):
        """ ????????????ID """
        selector = Selector(response)
        text2 = selector.xpath('body//table/tr/td/a/@href').extract()
        url_main = response.meta["url_main"]
        ID_ = response.meta["ID"]
        for elem in text2:
            elem = re.findall('uid=(\d+)', elem)
            if elem:
                ID = int(elem[0])
                if ID not in self.friends_id:  # ??ID????????????
                    self.friends_id.add(ID)
        url_next = selector.xpath(
            u'body//div[@class="pa" and @id="pagelist"]/form/div/a[text()="\u4e0b\u9875"]/@href').extract()
        if url_next:
            yield Request(url="http://weibo.cn%s" % url_next[0], meta={"url_main":url_main,"ID":ID_}, callback=self.parse3_fans)
        else:
            self.fans_finish = True
            if self.fans_finish and self.follows_finish:
                yield Request(url=url_main, meta={"ID":ID_}, dont_filter=True, callback=self.parse)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号