def gen_topic_queue(self):
logging.debug('start topic enqueue')
topics_sql=self.topics_id_sqlite()
if len(topics_sql) <= 2000:
return
topics_rss=self.topics_id_rss()
# load topics
if os.path.exists('.topics_all.json'):
with open('.topics_all.json','r') as f:
tmp_topics=json.load(f)
else:
tmp_topics=list()
t_queue=Queue('topic',connection=self.redis_conn)
# gen queue
for topic in topics_rss:
if topic not in topics_sql and topic not in tmp_topics:
topic_id=int(topic)
t_queue.enqueue(topic_spider.start,topic_id, self.topic_sleep_time)
#save topics
topics_all=list()
topics_all.extend(tmp_topics)
topics_all.extend(topics_rss)
topics_all.extend(topics_sql)
topics_all=list(set(topics_all))
with open('.topics_all.json','w') as f:
json.dump(topics_all, f)
return
评论列表
文章目录