def generateSimMatrix(phraseList):
#print 'Num elements', len(phraseList), phraseList
all_elements=[]
#for elementlist in phraseList:
for element in phraseList:
if len(element.strip())==0:
all_elements.append(' ')
else:
all_elements.append(element.strip())
tfidf_vectorizer = TfidfVectorizer(min_df=0, stop_words=None)
tfidf_matrix_train = tfidf_vectorizer.fit_transform(all_elements) #finds the tfidf score with normalization
cosineSimilarities=cosine_similarity(tfidf_matrix_train, tfidf_matrix_train)
return cosineSimilarities
评论列表
文章目录