def get_postag_with_index(sources, idx2word, word2idx):
path = os.path.dirname(__file__)
path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
print(path)
# jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
tagged_source = []
# Predict on testing data
for idx in xrange(len(sources)): # len(test_data_plain)
test_s_o = sources[idx]
source_text = keyphrase_utils.cut_zero(test_s_o, idx2word)
text = pos_tagger.tag(source_text)
print('[%d/%d] : %s' % (idx, len(sources), str(text)))
tagged_source.append(text)
return tagged_source
keyphrase_test_dataset.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录