def convert_graph(data_dir):
_allowed_error = 0.000001
rname = os.path.join(data_dir, 'train_lf')
rf = open(rname, 'w')
for fname in ('train.graph', 'valid.graph'):
print('reading', fname)
pname = os.path.join(data_dir, fname)
with codecs.open(pname, 'r', 'utf-8') as f:
for line in f:
line = json.loads(line)
sen = line['sentence']
sen = sen.split(' ')
forest, answer = line['forest'], line['answerF1']
good_lf = []
bad_lf = []
for choice in forest:
entity_list = choice['entities']
for graph in choice['graphs']:
lf = graph2lf(graph['graph'], entity_list)
parse_tree = Tree()
parse_tree.construct_from_sexp(lf)
nt, ter = parse_tree.get_nt_ter()
if set(graph['denotation']) & set(answer):
good_lf.append((lf, graph['denotation']))
else:
bad_lf.append((lf, graph['denotation']))
json.dump(sen, rf)
rf.write('\t')
json.dump(answer, rf)
rf.write('\t')
json.dump(good_lf, rf)
rf.write('\t')
json.dump(bad_lf, rf)
rf.write('\n')
评论列表
文章目录