def calculateWordsIDF(texts):
all_documents_count = len(texts);
idf_data = dict()
for text in texts:
for word, frequency in text.word_frequency.items():
word_doc_freq = 0.0;
for doc in texts:
if(isSentencesContainsWord(doc.register_pass_centences, word)):
word_doc_freq = word_doc_freq + 1.0
continue
pre_idx = (0.0 + all_documents_count)/word_doc_freq
inverse_document_frequency = math.log10(pre_idx)
idf_data[word] = inverse_document_frequency
return idf_data
# ????????? TF*IDF ??? ??????? ????? ??????? ?????? ? ?????????? ? text.words_tf_idf[word]
评论列表
文章目录