def calcuate_similarity(pivot_table, user_data, product_data, i, j, w_lambda):
if i==j:
return 0
normalize_freq = np.max(product_data['count'].values)
common = (pivot_table[i]*pivot_table[j]).nonzero()
rating_i = pivot_table[i][common[0]]
rating_j = pivot_table[j][common[0]]
rating_i = rating_i - user_data.iloc[i, 0]
rating_j = rating_j - user_data.iloc[j, 0]
variance = rating_i*rating_j
reputation = product_data.iloc[common[0], 0].as_matrix()/5
frequency = product_data.iloc[common[0], 2]/normalize_freq
val = np.sum(np.sqrt(w_lambda*np.square(np.reciprocal(reputation))+(1-w_lambda)*np.square(np.reciprocal(frequency))))
return val/ ( max(user_data.iloc[i, 1], 1)*max(user_data.iloc[j, 1], 1) )
#create pandas dataframe
# df = pd.read_csv('dataset/ratings_Electronics_compressed.csv',
# header=None,
# names=['reviewerID', 'productID', 'overall', 'unixReviewTime'],
# sep=',',
# dtype={'reviewerID':int, 'productID':int, 'overall':int, 'unixReviewTime':int})
评论列表
文章目录