wer.py 文件源码-python代码片段

def main():
    md = urllib2.urlopen('https://raw.githubusercontent.com/syhw/wer_are_we/master/README.md').read()
    bs = BeautifulSoup(mistune.markdown(md))
    wer_data_file = os.path.abspath(os.path.join(os.path.dirname(__file__),  "../data/wer.py"))
    file_output = "# The file was autogenerated by ../scrapers/wer.py\n\nfrom datetime import date\n\nfrom data.acoustics import speech_recognition, swb_hub_500\nfrom scales import *\n\n"
    wer_metrics = []
    for table, header in zip(bs.findAll('table'), bs.findAll('h3')):
        header = header.getText()
        rows = table.findAll('tr')
        metric_data = get_metrics(header, rows[0].findAll('th')[:-3], file_output)
        metric_names = metric_data[0]
        wer_metrics += metric_names
        table_data = []
        for row in rows:
            if row.findAll('td') == []:
                continue
            measure_data, targets, target_source = add_measures(metric_names, row)
            if not targets:
                table_data += measure_data
            elif not measure_data:
                metric_data = get_metrics(header, rows[0].findAll('th')[:-3], file_output, targets = targets, target_source = target_source)
        file_output = metric_data[1]
        file_output += "".join(sorted(table_data))
    file_output = file_output + "\n\nwer_metrics=[" + ", ".join(wer_metrics) + "]"

    with open(wer_data_file, 'wb') as f:
        f.write(file_output)