def load_user_act_cnt_with_timeZone(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00'):
'''
????????????
'''
dump_path = './cache/user_act_cnt_with_timeZone_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
if os.path.exists(dump_path):
with open(dump_path, 'rb') as f:
df = pickle.load(f)
else:
df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'time_zone'])
timeZone_dummies = pd.get_dummies(df.time_zone, prefix='time_zone_cnt')
df = pd.concat([df, timeZone_dummies], axis=1)
df.drop(['time_zone'], axis=1, inplace=True)
df = df.groupby(['user_id'], as_index=False).sum()
with open(dump_path, 'wb') as f:
pickle.dump(df, f)
return df
features_generator.py 文件源码
python
阅读 31
收藏 0
点赞 0
评论 0
评论列表
文章目录