def getTFIDF():
"""
:return:
"""
corpus,textList=getFenCiWords();
vectorizer=CountVectorizer()#??????????????????????a[i][j] ??j??i???????
transformer=TfidfTransformer()#??????????tf-idf??
tfidf=transformer.fit_transform(vectorizer.fit_transform(corpus))#???fit_transform???tf-idf????fit_transform??????????
word=vectorizer.get_feature_names()#????????????
weight = tfidf.toarray() # ?tf-idf?????????a[i][j]??j??i?????tf-idf??
print "?" + str(len(weight)) + "???" + ",?" + str(len(word)) + "??"
return weight, textList
# for i in range(len(weight)):#???????tf-idf????????for??????????for?????????????
# print u"-------?????",i,u"??????tf-idf??------"
# for j in range(len(word)):
# print word[j],weight[i][j]
评论列表
文章目录