def nltk_tfidf_vectorize(corpus):
from nltk.text import TextCollection
corpus = [list(tokenize(doc)) for doc in corpus]
texts = TextCollection(corpus)
for doc in corpus:
yield {
term: texts.tf_idf(term, doc)
for term in doc
}
评论列表
文章目录