def get_accumulate_brand_feat(start_date, end_date):
feature = ['brand', 'brand_action_1_ratio', 'brand_action_2_ratio', 'brand_action_3_ratio',
'brand_action_5_ratio', 'brand_action_6_ratio', 'brand_action_num']
dump_path = './cache/brand_feat_accumulate_%s_%s.csv' %(start_date,end_date)
if os._exists(dump_path):
actions = pd.read_csv(dump_path)
else:
actions = get_actions(start_date,end_date)
df = pd.get_dummies(actions['type'],prefix='action')
actions = pd.concat([actions['brand'],df],axis=1)
actions = actions.groupby(['brand'],as_index = False).sum()
actions['brand_action_1_ratio'] = actions['action_4']/actions['action_1']
actions['brand_action_2_ratio'] = actions['action_4']/actions['action_2']
actions['brand_action_3_ratio'] = actions['action_4']/actions['action_3']
actions['brand_action_5_ratio'] = actions['action_4']/actions['action_5']
actions['brand_action_6_ratio'] = actions['action_4']/actions['action_6']
actions['brand_action_num'] = actions['action_1'] + actions['action_2'] + actions['action_3'] + actions[
'action_4'] + actions['action_5'] + actions['action_6']
actions = actions[feature]
actions.replace(np.inf, 9999)
actions.to_csv(dump_path)
return actions
pass
评论列表
文章目录