spelling.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:calm 作者: cygwin 项目源码 文件源码
def spellcheck_hints(args, packages):
    spelldict = DictWithPWL('en-US')
    chkr = SpellChecker(spelldict, filters=[DescFilter])
    misspellings = {}

    # add technical words not in spell-checking dictionary
    wordlist = []
    with open('words.txt') as f:
        for w in f:
            # strip any trailing comment
            w = re.sub(r'#.*$', '', w)
            # strip any whitespace
            w = w.strip()
            spelldict.add(w)
            wordlist.append(w.lower())
            # XXX: for the moment, to reduce the set of errors, ignore the fact
            # that words.txt gives a canonical capitalization, and accept any
            # capitalization
            spelldict.add(w.lower())
            spelldict.add(w.capitalize())

    # add all package names as valid words
    for p in packages:
        for w in re.split('[_-]', p):
            # remove punctuation characters
            w = re.sub(r'[+]', '', w)
            # strip off any trailing numbers
            w = re.sub(r'[\d.]*$', '', w)

            # both with and without any lib prefix
            for w1 in [w, re.sub(r'^lib', '', w)]:
                # add the package name unless it exists in the list above, which
                # will give a canonical capitalization
                if w.lower() not in wordlist:
                    spelldict.add(w.lower())
                    spelldict.add(w)
                    spelldict.add(w.capitalize())

    # for each package
    for p in sorted(packages.keys()):
        # debuginfo packages have uninteresting, auto-generated text which
        # contains the package name
        if p.endswith('-debuginfo'):
            continue

        # spell-check the spell-checkable keys
        for k in ['sdesc', 'ldesc', 'message']:
            if k in packages[p].hints:
                chkr.set_text(packages[p].hints[k])
                # XXX: this is doing all the work to generate suggestions, which
                # we then ignore, so could be written much more efficiently
                for err in chkr:
                    # logging.error("package '%s', hint '%s': Is '%s' a word?" % (p, k, err.word))
                    misspellings.setdefault(err.word, 0)
                    misspellings[err.word] += 1

    # summarize
    for c in sorted(misspellings, key=misspellings.get, reverse=True):
        print('%16s: %4d' % (c, misspellings[c]))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号