def texts_tfidf(ids, important_texts, citations_texts) :
'''
Generates tf-idf vectors for each text then calculates cosine similarity between the vectors.
'''
tfidf = TfidfVectorizer(strip_accents='ascii',
stop_words='english',
ngram_range=(1,2),
min_df=2)
freqs1 = tfidf.fit_transform(important_texts)
terms1 = tfidf.get_feature_names()
freqs2 = tfidf.fit_transform(citations_texts)
terms2 = tfidf.get_feature_names()
return terms1, terms2, freqs1, freqs2
评论列表
文章目录