def reduce_result(dictionary, lsi_model, predictor, weibo_test):
# # # # ????? ????????
if not dictionary:
dictionary = corpora.Dictionary.load(path_dictionary)
if not lsi_model:
lsi_file = open(path_tmp_lsimodel,'rb')
lsi_model = pkl.load(lsi_file)
lsi_file.close()
if not predictor:
x = open(path_tmp_predictor,'rb')
predictor = pkl.load(x)
x.close()
files = os.listdir(path_tmp_lsi)
catg_list = []
for file in files:
t = file.split('.')[0]
if t not in catg_list:
catg_list.append(t)
demo_doc = weibo_test
print(demo_doc)
demo_doc = list(jieba.cut(demo_doc,cut_all=False))
demo_bow = dictionary.doc2bow(demo_doc)
tfidf_model = models.TfidfModel(dictionary=dictionary)
demo_tfidf = tfidf_model[demo_bow]
demo_lsi = lsi_model[demo_tfidf]
data = []
cols = []
rows = []
for item in demo_lsi:
data.append(item[1])
cols.append(item[0])
rows.append(0)
demo_matrix = csr_matrix((data,(rows,cols))).toarray()
x = predictor.predict(demo_matrix)
print('??????{x}'.format(x=catg_list[x[0]]))
评论列表
文章目录