def dimension_compression():
X_t_c = make_matrix()
token_list = []
contexts_list = []
for token, contexts in sorted(X_t_c.items()):
token_list.append(token)
contexts_list.append(contexts)
pca = PCA(n_components = 300)
DictoVec = DictVectorizer(sparse = True)
sparse = DictoVec.fit_transform(contexts_list)
print(sparse.shape)
vec_list = pca.fit_transform(sparse.todense())
word_vec = {}
for token, vec in zip(token_list, vec_list):
word_vec[token] = vec
return word_vec
评论列表
文章目录