def get_item_history(self, prior_or_train, reconstruct = False, none_idx = 49689):
filepath = self.cache_dir + './item_history_' + prior_or_train + '.pkl'
if (not reconstruct) and os.path.exists(filepath):
with open(filepath, 'rb') as f:
item_history = pickle.load(f)
else:
up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
item_history = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
item_history.loc[item_history.order_number == 1, 'product_id'] = item_history.loc[item_history.order_number == 1, 'product_id'] + [none_idx]
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
# accumulate
item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].transform(pd.Series.cumsum)
# get unique item list
item_history['product_id'] = item_history['product_id'].apply(set).apply(list)
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
# shift each group to make it history
item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].shift(1)
for row in item_history.loc[item_history.product_id.isnull(), 'product_id'].index:
item_history.at[row, 'product_id'] = [none_idx]
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
item_history.columns = ['user_id', 'history_items']
with open(filepath, 'wb') as f:
pickle.dump(item_history, f, pickle.HIGHEST_PROTOCOL)
return item_history
评论列表
文章目录