def tfidf_model(self):
if self._tfidf_model is None:
doc_count = self.post_ids_query.count()
if doc_count < 10:
return None
dictionary = self.dictionary
tfidf_model = gmodels.TfidfModel(id2word=dictionary)
tfidf_fname = join(self.dirname, "tfidf_%d.model" % (
self.discussion.id,))
subcorpus = self.subcorpus
if exists(tfidf_fname):
tfidf_model = tfidf_model.load(tfidf_fname)
# assumption: count implies identity.
# Wrong in corner cases: hidden, etc.
if tfidf_model.num_docs != doc_count:
unlink(tfidf_fname)
tfidf_model = gmodels.TfidfModel(id2word=dictionary)
if tfidf_model.num_docs != doc_count:
tfidf_model.initialize(subcorpus)
tfidf_model.save(tfidf_fname)
self._tfidf_model = tfidf_model
return self._tfidf_model
评论列表
文章目录