def bm25_weight(X, K1=100, B=0.8):
""" Weighs each row of a sparse matrix X by BM25 weighting """
# calculate idf per term (user)
X = coo_matrix(X)
N = float(X.shape[0])
idf = log(N / (1 + bincount(X.col)))
# calculate length_norm per document (artist)
row_sums = numpy.ravel(X.sum(axis=1))
average_length = row_sums.mean()
length_norm = (1.0 - B) + B * row_sums / average_length
# weight matrix rows by bm25
X.data = X.data * (K1 + 1.0) / (K1 * length_norm[X.row] + X.data) * idf[X.col]
return X
评论列表
文章目录