def make_tfidf(arr):
'''input, numpy array with flavor counts for each recipe and compounds
return numpy array adjusted as tfidf
'''
arr2 = arr.copy()
N=arr2.shape[0]
l2_rows = np.sqrt(np.sum(arr2**2, axis=1)).reshape(N, 1)
l2_rows[l2_rows==0]=1
arr2_norm = arr2/l2_rows
arr2_freq = np.sum(arr2_norm>0, axis=0)
arr2_idf = np.log(float(N+1) / (1.0 + arr2_freq)) + 1.0
from sklearn.preprocessing import normalize
tfidf = np.multiply(arr2_norm, arr2_idf)
tfidf = normalize(tfidf, norm='l2', axis=1)
print tfidf.shape
return tfidf
评论列表
文章目录