def show_edit_distance(self, num):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
wrong = 0
right = 0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc], word_batch['labeltype_input'][0:num_proc])
for j in range(0, num_proc):
ocr_result = deaccent(unicode(re.sub("[\+\/]", "", re.sub("\\s", "", decoded_res[j])), 'utf-8'))
gold_label = re.sub("[\+\/]", "", re.sub("\\s", "", word_batch['source_str'][j]))
if gold_label == ocr_result:
right += 1
else:
wrong += 1
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
absacc = float(right) / (float(right) + float(wrong))
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
outline = ' Out of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f\n Absolute accuracy over labels is %0.2f\n' % (
num, mean_ed, mean_norm_ed, absacc)
print(outline)
return mean_norm_ed, absacc
评论列表
文章目录