def save(self, save_dir='./'):
"""
Write out the built corpus to a save directory.
"""
# Store the tag tables.
pickle.dump((self.tagsToDocs, self.docsToTags), open(save_dir + 'tag-tables.pickle', 'wb'))
# Store the document titles.
pickle.dump(self.titles, open(save_dir + 'titles.pickle', 'wb'))
# Write out the tfidf model.
self.tfidf_model.save(save_dir + 'documents.tfidf_model')
# Write out the tfidf corpus.
corpora.MmCorpus.serialize(save_dir + 'documents_tfidf.mm', self.corpus_tfidf)
# Write out the dictionary.
self.dictionary.save(save_dir + 'documents.dict')
# Save the filenames.
pickle.dump(self.files, open(save_dir + 'files.pickle', 'wb'))
# Save the file ID and line numbers for each document.
pickle.dump(self.doc_line_nums, open(save_dir + 'doc_line_nums.pickle', 'wb'))
# Objects that are not saved:
# - stop_list - You don't need to filter stop words for new input
# text, they simply aren't found in the dictionary.
# - frequency - This preliminary word count object is only used for
# removing infrequent words. Final word counts are in
# the `dictionary` object.
评论列表
文章目录