processingData.py 文件源码-python代码片段

processingData.py 文件源码

python

阅读 50 收藏 0 点赞 0 评论 0

项目：market-predictor 作者: bsmitty5000 项目源码文件源码

def read_scraped_jason(filename):
    df = pd.read_json(filename)

    for column in df.columns:
        df[column] = df[column].apply(unlist)
    # gets only first 10 characters of date: year/month/day
    df['date'] = df['date'].apply(lambda x: x[:10])
    df['date'] = pd.to_datetime(df['date'])

    # if any removes duplicate posts
    df = df.drop_duplicates(subset = ['keywords'])
    # sorts dataframe by post date
    df = df.sort_values(by='date')

    df = df.drop('body', 1)
    df = df.drop('title', 1)

    df['keywords'].replace('', np.nan, inplace=True)
    df = df.dropna()

    return df