def extract_instances(self, train_instances):
sentences = []
for idx, train_instance in enumerate(train_instances):
sa, sb = train_instance.get_word(type='lemma', lower=True)
sentences.append(TaggedDocument(words=sa, tags=['sa_%d' % idx]))
sentences.append(TaggedDocument(words=sb, tags=['sb_%d' % idx]))
model = Doc2Vec(sentences, size=25, window=3, min_count=0, workers=10, iter=1000)
features = []
infos = []
for idx in range(len(train_instances)):
vec_a = model.docvecs['sa_%d' % idx]
vec_b = model.docvecs['sb_%d' % idx]
feature, info = vk.get_all_kernel(vec_a, vec_b)
features.append(feature)
infos.append([])
# infos.append([vec_a, vec_b])
return features, infos
# def load_instances(self, train_instances):
# """
# extract cosine distance from already trained feature file
# without modify the feature_file
# this function's priority is higher that the above extract_instances
# """
#
# _features, _n_dim, _n_instance = Feature.load_feature_from_file(self.feature_file)
# features = []
# infos = []
# ''' get features from train instances'''
# for _feature in _features:
# feature = Feature._feat_string_to_list(_feature, _n_dim)
# features.append([feature[1]])
# infos.append(['cosine'])
#
# features = [ Feature._feat_list_to_string(feature) for feature in features ]
#
# return features, 1, _n_instance
features_nn.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录