def load_UCPair_onlyact(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', cate=[8]):
'''
???????????????????
'''
df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'cate'])
df = df.drop_duplicates()
temp = df.groupby(['user_id']).size().reset_index(name='ncate')
df = pd.merge(df, temp, on=['user_id'], how='left')
df = df[df.cate==8]
df['ncate'] = (df['ncate'] == 1).astype(int)
return df[['user_id', 'cate', 'ncate']]
# def get_uid_label(start_date = '2016-02-01 00:00:00', end_date = '2016-04-15 00:00:00'):
# dump_path = './cache/uid_label_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
# if os.path.exists(dump_path):
# with open(dump_path, 'rb') as f:
# df = pickle.load(f)
# else:
# df = get_action_data(start_date=start_date, end_date=end_date, field=['user_id', 'type'])
# df = df[df.type==4].user_id.drop_duplicates().to_frame()
# with open(dump_path, 'wb') as f:
# pickle.dump(df, f)
# return df
features_generator.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录