yodaqa2csv.py 文件源码-python代码片段

yodaqa2csv.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

def load_jacana(fname, regexen):
    samples = []
    with open(fname, 'rt') as inp:
        for line in inp:
            line = line.strip()
            if line.startswith('<Q> '):
                qorig = line[len('<Q> '):]
                q = word_tokenize(qorig)
            else:
                l = line.split(' ')
                label = int(l[0])
                kwweight = float(l[1])
                aboutkwweight = float(l[2])
                text = word_tokenize(' '.join(l[3:]))
                toklabels = regex_overlap(text, regexen[qorig])
                samples.append({'qtext': ' '.join(q), 'label': label,
                                'atext': ' '.join(text),
                                'kwweight': kwweight, 'aboutkwweight': aboutkwweight,
                                'toklabels': ' '.join([str(0+tl) for tl in toklabels])})
    return samples