ockre.py 文件源码-python代码片段

def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        wrong = 0
        right = 0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc], word_batch['labeltype_input'][0:num_proc])
            for j in range(0, num_proc):
                ocr_result = deaccent(unicode(re.sub("[\+\/]", "", re.sub("\\s", "", decoded_res[j])), 'utf-8'))
                gold_label = re.sub("[\+\/]", "", re.sub("\\s", "", word_batch['source_str'][j]))
                if gold_label == ocr_result:
                    right += 1
                else:
                    wrong += 1
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        absacc = float(right) / (float(right) + float(wrong))
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        outline = ' Out of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f\n Absolute accuracy over labels is %0.2f\n' % (
            num, mean_ed, mean_norm_ed, absacc)
        print(outline)

        return mean_norm_ed, absacc