def build_dictionary(generator, min_freq=5):
dictionary_path = os.path.join(DATA_PATH, DICT_NAME)
if os.path.exists(dictionary_path) and os.path.isfile(dictionary_path):
print("Delete dictionary and rebuild")
os.remove(dictionary_path)
dictionary = corpora.Dictionary(c + u for c, u in generator)
# ?????ID
filter_ids = [tokenid for tokenid, docfreq in dictionary.dfs.items() if
docfreq < min_freq]
dictionary.filter_tokens(filter_ids)
dictionary.compactify()
dictionary.add_documents([_START_VOCAB])
pickle.dump(dictionary, open(dictionary_path, 'wb'))
print("SVAE dictionary to %s" % (dictionary_path))
return dictionary
评论列表
文章目录