def read_stories_by_tags():
tags = list()
current_date = START_DATE
while current_date <= END_DATE:
file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
raw_data = json.loads(str(file_in.read()))
file_in.close()
for raw_story in raw_data['stories']:
for raw_tag in raw_story['tags']:
tag = dict()
tag['top_date'] = current_date.isoformat()
tag['story_id'] = raw_story['story_id']
tag['author'] = raw_story['author']
tag['published_date'] = raw_story['published_date']
tag['recommends'] = raw_story['recommends']
tag['responses'] = raw_story['responses']
tag['name'] = raw_tag['name']
tag['post_count'] = raw_tag['postCount']
tag['follower_count'] = raw_tag['metadata']['followerCount']
tags.append(tag)
print(current_date.isoformat())
current_date = current_date + datetime.timedelta(days=1)
return pd.read_json(json.dumps(tags))
medium_topstories_analyzer.py 文件源码
python
阅读 34
收藏 0
点赞 0
评论 0
评论列表
文章目录