def tesseract_ocr_helper(base_image, config="Default"):
""" A wrapper for using tesseract to do OCR
"""
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
# The tools are returned in the recommended order of usage
tool = tools[0]
print("Will use tool '%s'" % (tool.get_name()))
langs = tool.get_available_languages()
print("Available languages: %s" % ", ".join(langs))
lang = langs[0]
print("Will use lang '%s'" % (lang))
custom_builder = pyocr.builders.TextBuilder()
if config != "Default":
custom_builder.tesseract_configs = [config]
txt = tool.image_to_string(
base_image,
lang=lang,
builder=custom_builder
)
# Spell correct
dict_path = os.path.join(os.path.dirname(__file__),"dict/urban_dict.txt")
d = enchant.DictWithPWL("en_US", dict_path)
txtA = txt.replace('\n', ' \n ')
A = txtA.split(" ")
B = []
for x in A:
if (x != '\n' and len(x) != 0
and d.check(x) is False
and len(d.suggest(x)) != 0):
B.append(d.suggest(x)[0])
else:
B.append(x)
return " ".join(B)
评论列表
文章目录