def transform(self):
## get common vocabulary
tfidf = self._init_word_ngram_tfidf(self.ngram)
tfidf.fit(list(self.obs_corpus) + list(self.target_corpus))
vocabulary = tfidf.vocabulary_
## obs tfidf
tfidf = self._init_word_ngram_tfidf(self.ngram, vocabulary)
X_obs = tfidf.fit_transform(self.obs_corpus)
## targetument tfidf
tfidf = self._init_word_ngram_tfidf(self.ngram, vocabulary)
X_target = tfidf.fit_transform(self.target_corpus)
## svd
svd = TruncatedSVD(n_components = self.svd_dim,
n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED)
svd.fit(scipy.sparse.vstack((X_obs, X_target)))
X_obs = svd.transform(X_obs)
X_target = svd.transform(X_target)
## cosine similarity
sim = list(map(dist_utils._cosine_sim, X_obs, X_target))
sim = np.asarray(sim).squeeze()
return sim
feature_vector_space.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录