def reduce_dict(weibo_test):
dictionary = None
if not os.path.exists(path_tmp):
os.makedirs(path_tmp)
# ?????????????????
if not os.path.exists(path_dictionary):
dictionary = corpora.Dictionary()
files = os_path.LoadFiles(path_doc_root)
for i, msg in enumerate(files):
catg = msg[0]
file = msg[1]
file = convert_doc_to_wordlist(file, cut_all=False)
dictionary.add_documents([file])
# ??????????????
small_freq_ids = [tokenid for tokenid, docfreq in dictionary.dfs.items() if docfreq < 5]
dictionary.filter_tokens(small_freq_ids)
dictionary.compactify()
dictionary.save(path_dictionary)
svm_tfidf.reduce_tfidf(dictionary, weibo_test)
评论列表
文章目录