evaluation.py 文件源码

python
阅读 40 收藏 0 点赞 0 评论 0

项目:seq2seq 作者: eske 项目源码 文件源码
def corpus_ter(hypotheses, references, case_sensitive=True, tercom_path=None, **kwargs):
    tercom_path = tercom_path or 'scripts/tercom.jar'

    with tempfile.NamedTemporaryFile('w') as hypothesis_file, tempfile.NamedTemporaryFile('w') as reference_file:
        for i, (hypothesis, reference) in enumerate(zip(hypotheses, references)):
            hypothesis_file.write('{} ({})\n'.format(hypothesis, i))
            reference_file.write('{} ({})\n'.format(reference, i))
        hypothesis_file.flush()
        reference_file.flush()

        cmd = ['java', '-jar', tercom_path, '-h', hypothesis_file.name, '-r', reference_file.name]
        if case_sensitive:
            cmd.append('-s')

        output = subprocess.check_output(cmd).decode()

        error = re.findall(r'Total TER: (.*?) ', output, re.MULTILINE)[0]
        return float(error) * 100, ''
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号