def do_calc_svd(self):
print("?????%d" %(nlp_master.get_dict_len()))
self.k_value = int(0.1*(nlp_master.get_dict_len()))
if self.k_value < 300:
self.k_value = 300
if self.k_value > 1000:
self.k_value = 1000
print("k??%d" %(self.k_value))
tfidf = models.TfidfModel(list(nlp_master._id_docs.values()))
tfidf_corpus = tfidf[list(nlp_master._id_docs.values())]
# num_topics?????????????? 200–500
# LSI??
self.lsi = models.LsiModel(tfidf_corpus, id2word=nlp_master.dictionary, num_topics=self.k_value, chunksize=2000)
# ??????
today = datetime.date.today()
self.dumpfile = "dumpdir/recsvd_dump.%d_%d" %(today.month, today.day)
with open(self.dumpfile,'wb', -1) as fp:
dump_data = []
dump_data.append(self._user_classifier)
dump_data.append(self.k_value)
dump_data.append(self.lsi)
pickle.dump(dump_data, fp, -1)
return
# ???????NULL???
# ???????site_news?????????????
评论列表
文章目录