rss_spider.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:v2ex_delete 作者: yingziwu 项目源码 文件源码
def gen_topic_queue(self):
        logging.debug('start topic enqueue')
        topics_sql=self.topics_id_sqlite()
        if len(topics_sql) <= 2000:
            return
        topics_rss=self.topics_id_rss()
        # load topics
        if os.path.exists('.topics_all.json'):
            with open('.topics_all.json','r') as f:
                tmp_topics=json.load(f)
        else:
            tmp_topics=list()
        t_queue=Queue('topic',connection=self.redis_conn)
        # gen queue
        for topic in topics_rss:
            if topic not in topics_sql and topic not in tmp_topics:
                topic_id=int(topic)
                t_queue.enqueue(topic_spider.start,topic_id, self.topic_sleep_time)
        #save topics
        topics_all=list()
        topics_all.extend(tmp_topics)
        topics_all.extend(topics_rss)
        topics_all.extend(topics_sql)
        topics_all=list(set(topics_all))
        with open('.topics_all.json','w') as f:
            json.dump(topics_all, f)
        return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号