def remove_rare_elements(data, min_user_activity, min_item_popularity):
'''Removes user and items that appears in too few interactions.
min_user_activity is the minimum number of interaction that a user should have.
min_item_popularity is the minimum number of interaction that an item should have.
NB: the constraint on item might not be strictly satisfied because rare users and items are removed in alternance,
and the last removal of inactive users might create new rare items.
'''
print('Remove inactive users and rare items...')
#Remove inactive users a first time
user_activity = data.groupby('u').size()
data = data[np.in1d(data.u, user_activity[user_activity >= min_user_activity].index)]
#Remove unpopular items
item_popularity = data.groupby('i').size()
data = data[np.in1d(data.i, item_popularity[item_popularity >= min_item_popularity].index)]
#Remove users that might have passed below the activity threshold due to the removal of rare items
user_activity = data.groupby('u').size()
data = data[np.in1d(data.u, user_activity[user_activity >= min_user_activity].index)]
return data
preprocess.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录