def convert_graph(data_dir):
_allowed_error = 0.000001
rname = os.path.join(data_dir, 'train_lf_spade')
rf = open(rname, 'w')
for fname in ['spades.bow.graphs.train.json']:
print('reading', fname)
pname = os.path.join(data_dir, fname)
with codecs.open(pname, 'r', 'utf-8') as f:
for line in f:
try:
line = json.loads(line)
except:
continue
sen = line['words']
sen = [x['word'] for x in sen]
forest, answer = line['graphs'], line['answerString']
if not line.has_key('entities'): continue
entity_list = line['entities']
good_lf = []
bad_lf = []
if len(forest) == 0:
continue
find_lf = 0
for graph in forest:
lf = graph2lf(graph['graph'], entity_list)
if lf is None:
continue
parse_tree = Tree()
parse_tree.construct_from_sexp(lf)
find_lf = 1
nt, ter = parse_tree.get_nt_ter()
if set(graph['denotation']) & set(answer):
good_lf.append((lf, graph['denotation']))
else:
bad_lf.append((lf, graph['denotation']))
if not find_lf:
continue
json.dump(sen, rf)
rf.write('\t')
json.dump(answer, rf)
rf.write('\t')
json.dump(good_lf, rf)
rf.write('\t')
json.dump(bad_lf, rf)
rf.write('\n')
评论列表
文章目录