def get_term_topic(self, X):
n_features = X.shape[1]
id2word = self.vocabulary_
word2topic = {}
with open('word_topic.txt', 'r') as f:
for line in f:
strs = line.decode('utf-8').strip('\n').split('\t')
word2topic[strs[0]] = strs[2]
topic = np.zeros((len(id2word),))
for i, key in enumerate(id2word):
if key in word2topic:
topic[id2word[key]] = word2topic[key]
else:
print key
topic = preprocessing.MinMaxScaler().fit_transform(topic)
# topic = sp.spdiags(topic, diags=0, m=n_features,
# n=n_features, format='csr')
return topic
评论列表
文章目录