def load_base_user_feat(end_date='2016-04-16'):
'''
????????
'''
dump_path = './cache/base_user_feat_{0}.pkl'.format(end_date[:10])
if os.path.exists(dump_path):
with open(dump_path, 'rb') as f:
df = pickle.load(f)
else:
df = pd.read_csv(USER_FILE, encoding='gbk')
# sex_dummies = pd.get_dummies(df.sex, prefix='sex')
df.user_reg_tm.fillna('2016-02-01', inplace=True)
df.user_reg_tm = pd.to_datetime(df.user_reg_tm).apply(lambda t: pd.to_datetime('2016-02-01') if t > pd.to_datetime('2016-04-15') else t)
df['reg_tm_dist'] = df.user_reg_tm.apply(lambda t: (pd.to_datetime(end_date) - t).days)
df = df[['user_id', 'user_lv_cd', 'reg_tm_dist']]
# df = pd.concat([df, sex_dummies], axis=1)
# age_dummies = pd.get_dummies(df.age, prefix='age')
# N = age_dummies.shape[1]
# age_dummies.columns = ['age_{0}'.format(i) for i in range(N)]
# df = pd.concat([df, age_dummies], axis=1)
with open(dump_path, 'wb') as f:
pickle.dump(df, f)
return df
features_generator.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录