medium_topstories_analyzer.py 文件源码-python代码片段

medium_topstories_analyzer.py 文件源码

python

阅读 41 收藏 0 点赞 0 评论 0

项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码

def read_stories_without_tags():
    stories = list()
    current_date = START_DATE
    while current_date <= END_DATE:
        file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
        raw_data = json.loads(str(file_in.read()))
        file_in.close()
        for raw_story in raw_data['stories']:
            story = dict()
            story['top_date'] = current_date.isoformat()
            story['story_id'] = raw_story['story_id']
            story['author'] = raw_story['author']
            story['published_date'] = raw_story['published_date']
            story['recommends'] = raw_story['recommends']
            story['responses'] = raw_story['responses']
            story['tags_count'] = len(raw_story['tags'])
            stories.append(story)
        print(current_date.isoformat())
        current_date = current_date + datetime.timedelta(days=1)
    return pd.read_json(json.dumps(stories))