def main():
md = urllib2.urlopen('https://raw.githubusercontent.com/syhw/wer_are_we/master/README.md').read()
bs = BeautifulSoup(mistune.markdown(md))
wer_data_file = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/wer.py"))
file_output = "# The file was autogenerated by ../scrapers/wer.py\n\nfrom datetime import date\n\nfrom data.acoustics import speech_recognition, swb_hub_500\nfrom scales import *\n\n"
wer_metrics = []
for table, header in zip(bs.findAll('table'), bs.findAll('h3')):
header = header.getText()
rows = table.findAll('tr')
metric_data = get_metrics(header, rows[0].findAll('th')[:-3], file_output)
metric_names = metric_data[0]
wer_metrics += metric_names
table_data = []
for row in rows:
if row.findAll('td') == []:
continue
measure_data, targets, target_source = add_measures(metric_names, row)
if not targets:
table_data += measure_data
elif not measure_data:
metric_data = get_metrics(header, rows[0].findAll('th')[:-3], file_output, targets = targets, target_source = target_source)
file_output = metric_data[1]
file_output += "".join(sorted(table_data))
file_output = file_output + "\n\nwer_metrics=[" + ", ".join(wer_metrics) + "]"
with open(wer_data_file, 'wb') as f:
f.write(file_output)
评论列表
文章目录