def get_baskets(self, prior_or_train, reconstruct = False, reordered = False, none_idx = 49689):
'''
get users' baskets
'''
if reordered:
filepath = self.cache_dir + './reorder_basket_' + prior_or_train + '.pkl'
else:
filepath = self.cache_dir + './basket_' + prior_or_train + '.pkl'
if (not reconstruct) and os.path.exists(filepath):
with open(filepath, 'rb') as f:
up_basket = pickle.load(f)
else:
up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
uid_oid = up[['user_id', 'order_number']].drop_duplicates()
up = up[up.reordered == 1][['user_id', 'order_number', 'product_id']] if reordered else up[['user_id', 'order_number', 'product_id']]
up_basket = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
up_basket = pd.merge(uid_oid, up_basket, on = ['user_id', 'order_number'], how = 'left')
for row in up_basket.loc[up_basket.product_id.isnull(), 'product_id'].index:
up_basket.at[row, 'product_id'] = [none_idx]
up_basket = up_basket.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
up_basket.columns = ['user_id', 'reorder_basket'] if reordered else ['user_id', 'basket']
#pdb.set_trace()
with open(filepath, 'wb') as f:
pickle.dump(up_basket, f, pickle.HIGHEST_PROTOCOL)
return up_basket
评论列表
文章目录