def check_postag(config):
train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset'])
path = os.path.dirname(__file__)
path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
for dataset_name in config['testing_datasets']:
# override the original test_set
# test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type'])
test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config)
test_set = test_sets[dataset_name]
# print(dataset_name)
# print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']])))
test_data_plain = zip(*(test_set['source'], test_set['target']))
test_size = len(test_data_plain)
# Alternatively to setting the CLASSPATH add the jar and model via their path:
jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
for idx in xrange(len(test_data_plain)): # len(test_data_plain)
test_s_o, test_t_o = test_data_plain[idx]
source = keyphrase_utils.cut_zero(test_s_o, idx2word)
print(source)
# Add other jars from Stanford directory
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
text = pos_tagger.tag(source)
print(text)
keyphrase_test_dataset.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录