features_generator.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:JData-algorithm-competition 作者: wrzto 项目源码 文件源码
def load_UCPair_onlyact(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', cate=[8]):
    '''
    ???????????????????
    '''
    df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'cate'])
    df = df.drop_duplicates()
    temp = df.groupby(['user_id']).size().reset_index(name='ncate')
    df = pd.merge(df, temp, on=['user_id'], how='left')
    df = df[df.cate==8]
    df['ncate'] = (df['ncate'] == 1).astype(int)

    return df[['user_id', 'cate', 'ncate']]

# def get_uid_label(start_date = '2016-02-01 00:00:00', end_date = '2016-04-15 00:00:00'):
#     dump_path = './cache/uid_label_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
#     if os.path.exists(dump_path):
#         with open(dump_path, 'rb') as f:
#             df = pickle.load(f)
#     else:
#         df = get_action_data(start_date=start_date, end_date=end_date, field=['user_id', 'type'])
#         df = df[df.type==4].user_id.drop_duplicates().to_frame()
#         with open(dump_path, 'wb') as f:
#             pickle.dump(df, f)
#     return df
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号