medium_topstories_analyzer.py 文件源码-python代码片段

medium_topstories_analyzer.py 文件源码

python

阅读 48 收藏 0 点赞 0 评论 0

项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码

def read_stories_by_tags():
    tags = list()
    current_date = START_DATE
    while current_date <= END_DATE:
        file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
        raw_data = json.loads(str(file_in.read()))
        file_in.close()
        for raw_story in raw_data['stories']:
            for raw_tag in raw_story['tags']:
                tag = dict()
                tag['top_date'] = current_date.isoformat()
                tag['story_id'] = raw_story['story_id']
                tag['author'] = raw_story['author']
                tag['published_date'] = raw_story['published_date']
                tag['recommends'] = raw_story['recommends']
                tag['responses'] = raw_story['responses']
                tag['name'] = raw_tag['name']
                tag['post_count'] = raw_tag['postCount']
                tag['follower_count'] = raw_tag['metadata']['followerCount']
                tags.append(tag)
        print(current_date.isoformat())
        current_date = current_date + datetime.timedelta(days=1)
    return pd.read_json(json.dumps(tags))