memeocr.py 文件源码-python代码片段

def tesseract_ocr_helper(base_image, config="Default"):
    """ A wrapper for using tesseract to do OCR
    """
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)

    # The tools are returned in the recommended order of usage
    tool = tools[0]
    print("Will use tool '%s'" % (tool.get_name()))

    langs = tool.get_available_languages()
    print("Available languages: %s" % ", ".join(langs))
    lang = langs[0]
    print("Will use lang '%s'" % (lang))

    custom_builder = pyocr.builders.TextBuilder()
    if config != "Default":
        custom_builder.tesseract_configs = [config]

    txt = tool.image_to_string(
        base_image,
        lang=lang,
        builder=custom_builder
    )

    # Spell correct
    dict_path = os.path.join(os.path.dirname(__file__),"dict/urban_dict.txt")
    d = enchant.DictWithPWL("en_US", dict_path)
    txtA = txt.replace('\n', ' \n ')
    A = txtA.split(" ")
    B = []

    for x in A:
        if (x != '\n' and len(x) != 0
                and d.check(x) is False
                and len(d.suggest(x)) != 0):
            B.append(d.suggest(x)[0])
        else:
            B.append(x)

    return " ".join(B)