def load_jacana(fname, regexen):
samples = []
with open(fname, 'rt') as inp:
for line in inp:
line = line.strip()
if line.startswith('<Q> '):
qorig = line[len('<Q> '):]
q = word_tokenize(qorig)
else:
l = line.split(' ')
label = int(l[0])
kwweight = float(l[1])
aboutkwweight = float(l[2])
text = word_tokenize(' '.join(l[3:]))
toklabels = regex_overlap(text, regexen[qorig])
samples.append({'qtext': ' '.join(q), 'label': label,
'atext': ' '.join(text),
'kwweight': kwweight, 'aboutkwweight': aboutkwweight,
'toklabels': ' '.join([str(0+tl) for tl in toklabels])})
return samples
评论列表
文章目录