data.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:DREAM 作者: LaceyChen17 项目源码 文件源码
def get_baskets(self, prior_or_train, reconstruct = False, reordered = False, none_idx = 49689):
        '''
            get users' baskets
        '''
        if reordered:
            filepath = self.cache_dir + './reorder_basket_' + prior_or_train + '.pkl'
        else:
            filepath = self.cache_dir + './basket_' + prior_or_train + '.pkl'

        if (not reconstruct) and os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                up_basket = pickle.load(f)
        else:          
            up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
            uid_oid = up[['user_id', 'order_number']].drop_duplicates()
            up = up[up.reordered == 1][['user_id', 'order_number', 'product_id']] if reordered else up[['user_id', 'order_number', 'product_id']]
            up_basket = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
            up_basket = pd.merge(uid_oid, up_basket, on = ['user_id', 'order_number'], how = 'left')
            for row in up_basket.loc[up_basket.product_id.isnull(), 'product_id'].index:
                up_basket.at[row, 'product_id'] = [none_idx]
            up_basket = up_basket.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
            up_basket.columns = ['user_id', 'reorder_basket'] if reordered else ['user_id', 'basket']
            #pdb.set_trace()
            with open(filepath, 'wb') as f:
                pickle.dump(up_basket, f, pickle.HIGHEST_PROTOCOL)
        return up_basket
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号