weibo_crawler.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:accurate_weibo_crawler 作者: wdwind 项目源码 文件源码
def get_all_weibos(self, uid, num_weibos, max_id, since_id):
        all_weibos = {}
        all_weibos = weibo_util.user_timeline_all(uid, num_weibos, all_weibos, weibo_util.user_timeline_public)
        times = 0
        while len(all_weibos) != num_weibos:
            logger.log('[x] Crawling... %d/%d' % (len(all_weibos), num_weibos))
            all_weibos = weibo_util.user_timeline_all(uid, num_weibos, all_weibos, weibo_util.user_timeline_public, count=200)
            times += 1
            if times == 2:
                break
        times = 0
        while len(all_weibos) != num_weibos:
            logger.log('[x] Crawling... %d/%d' % (len(all_weibos), num_weibos))
            all_weibos = weibo_util.user_timeline_all(uid, num_weibos, all_weibos, weibo_util.user_timeline_private)
            times += 1
            if times == 1:
                break
        times = 0
        while len(all_weibos) != num_weibos:
            logger.log('[x] Crawling... %d/%d' % (len(all_weibos), num_weibos))
            all_weibos = weibo_util.user_timeline_all_since(uid, since_id, num_weibos, all_weibos, weibo_util.user_timeline_public, count=200)
            times += 1
            if times == 2:
                break
        # comment this part because the `since_id` argument does not work in private api
        #times = 0
        #while len(all_weibos) != num_weibos:
        #    logger.log('[x] Crawling... %d/%d' % (len(all_weibos), num_weibos))
        #    all_weibos = weibo_util.user_timeline_all_since(uid, since_id, num_weibos, all_weibos, weibo_util.user_timeline_private)
        #    times += 1
        #    if times == 1:
        #        break
        logger.log('[x] Crawling... %d/%d' % (len(all_weibos), num_weibos))
        return all_weibos
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号