def dim_reduction():
dic2vec = DictVectorizer(sparse=True)
PPMI = getPPMI()
tc = list()
token_list = list()
for token, contexts in sorted(PPMI.items()):
token_list.append(token)
contexts = dict(contexts)
tc.append(contexts)
tc_vec = dic2vec.fit_transform(tc)
tc_svd = svds(tc_vec, 300)
tc_pca = np.dot(tc_svd[0], np.diag(tc_svd[1]))
word_vec = dict()
for token, vec in zip(token_list, tc_pca):
word_vec[token] = vec
return word_vec
评论列表
文章目录