def get_accumulate_product_feat(start_date, end_date):
feature = ['sku_id', 'product_action_1_ratio', 'product_action_2_ratio', 'product_action_3_ratio',
'product_action_5_ratio', 'product_action_6_ratio']
dump_path = './cache/product_feat_accumulate_%s_%s.pkl' % (start_date, end_date)
if os.path.exists(dump_path):
actions = pickle.load(open(dump_path))
else:
actions = get_actions(start_date, end_date)
df = pd.get_dummies(actions['type'], prefix='action')
actions = pd.concat([actions['sku_id'], df], axis=1)
actions = actions.groupby(['sku_id'], as_index=False).sum()
actions['product_action_1_ratio'] = actions['action_4'] / actions['action_1']
actions['product_action_2_ratio'] = actions['action_4'] / actions['action_2']
actions['product_action_3_ratio'] = actions['action_4'] / actions['action_3']
actions['product_action_5_ratio'] = actions['action_4'] / actions['action_5']
actions['product_action_6_ratio'] = actions['action_4'] / actions['action_6']
actions = actions[feature]
pickle.dump(actions, open(dump_path, 'w'))
return actions
评论列表
文章目录