def read_stories_without_tags():
stories = list()
current_date = START_DATE
while current_date <= END_DATE:
file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
raw_data = json.loads(str(file_in.read()))
file_in.close()
for raw_story in raw_data['stories']:
story = dict()
story['top_date'] = current_date.isoformat()
story['story_id'] = raw_story['story_id']
story['author'] = raw_story['author']
story['published_date'] = raw_story['published_date']
story['recommends'] = raw_story['recommends']
story['responses'] = raw_story['responses']
story['tags_count'] = len(raw_story['tags'])
stories.append(story)
print(current_date.isoformat())
current_date = current_date + datetime.timedelta(days=1)
return pd.read_json(json.dumps(stories))
medium_topstories_analyzer.py 文件源码
python
阅读 34
收藏 0
点赞 0
评论 0
评论列表
文章目录