def load_user_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00'):
'''
????????,??????.
'''
dump_path = './cache/user_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
if os.path.exists(dump_path):
with open(dump_path, 'rb') as f:
df = pickle.load(f)
else:
df = get_action_data(start_date=start_date, end_date=end_date, field=['user_id', 'time', 'type'])
prefix = 'Action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
type_dummies = pd.get_dummies(df['type'], prefix=prefix)
df = pd.concat([df, type_dummies], axis=1)
drop_cols = ['time', 'type']
df.drop(drop_cols, axis=1, inplace=True)
df = df.groupby(['user_id'], as_index=False).sum()
with open(dump_path, 'wb') as f:
pickle.dump(df, f)
return df
features_generator.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录