memeocr.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:meme_get 作者: memegen 项目源码 文件源码
def tesseract_ocr_helper(base_image, config="Default"):
    """ A wrapper for using tesseract to do OCR
    """
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)

    # The tools are returned in the recommended order of usage
    tool = tools[0]
    print("Will use tool '%s'" % (tool.get_name()))

    langs = tool.get_available_languages()
    print("Available languages: %s" % ", ".join(langs))
    lang = langs[0]
    print("Will use lang '%s'" % (lang))

    custom_builder = pyocr.builders.TextBuilder()
    if config != "Default":
        custom_builder.tesseract_configs = [config]

    txt = tool.image_to_string(
        base_image,
        lang=lang,
        builder=custom_builder
    )

    # Spell correct
    dict_path = os.path.join(os.path.dirname(__file__),"dict/urban_dict.txt")
    d = enchant.DictWithPWL("en_US", dict_path)
    txtA = txt.replace('\n', ' \n ')
    A = txtA.split(" ")
    B = []

    for x in A:
        if (x != '\n' and len(x) != 0
                and d.check(x) is False
                and len(d.suggest(x)) != 0):
            B.append(d.suggest(x)[0])
        else:
            B.append(x)

    return " ".join(B)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号