def load_brand_comment_ratio(end_date = '2016-04-01 00:00:00'):
'''
?????
'''
dump_path = './cache/brand_comment_ratio_{0}.pkl'.format(end_date[:10])
if os.path.exists(dump_path):
with open(dump_path, 'rb') as f:
df = pickle.load(f)
else:
item_feat = load_base_item_feat(end_date = end_date)
item_feat = item_feat[['sku_id', 'bad_comment_rate']]
brands = get_action_data(start_date = '2016-02-01 00:00:00', end_date = end_date, field=['sku_id', 'brand'])
brands = brands.drop_duplicates()
df = pd.merge(item_feat, brands, on=['sku_id'], how='left')
df = df[['brand', 'bad_comment_rate']]
df = df.groupby(['brand'], as_index=False).mean()
df.columns = ['brand', 'brand_bad_comment_rate']
with open(dump_path, 'wb') as f:
pickle.dump(df, f)
return df
features_generator.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录