def spellcheck_hints(args, packages):
spelldict = DictWithPWL('en-US')
chkr = SpellChecker(spelldict, filters=[DescFilter])
misspellings = {}
# add technical words not in spell-checking dictionary
wordlist = []
with open('words.txt') as f:
for w in f:
# strip any trailing comment
w = re.sub(r'#.*$', '', w)
# strip any whitespace
w = w.strip()
spelldict.add(w)
wordlist.append(w.lower())
# XXX: for the moment, to reduce the set of errors, ignore the fact
# that words.txt gives a canonical capitalization, and accept any
# capitalization
spelldict.add(w.lower())
spelldict.add(w.capitalize())
# add all package names as valid words
for p in packages:
for w in re.split('[_-]', p):
# remove punctuation characters
w = re.sub(r'[+]', '', w)
# strip off any trailing numbers
w = re.sub(r'[\d.]*$', '', w)
# both with and without any lib prefix
for w1 in [w, re.sub(r'^lib', '', w)]:
# add the package name unless it exists in the list above, which
# will give a canonical capitalization
if w.lower() not in wordlist:
spelldict.add(w.lower())
spelldict.add(w)
spelldict.add(w.capitalize())
# for each package
for p in sorted(packages.keys()):
# debuginfo packages have uninteresting, auto-generated text which
# contains the package name
if p.endswith('-debuginfo'):
continue
# spell-check the spell-checkable keys
for k in ['sdesc', 'ldesc', 'message']:
if k in packages[p].hints:
chkr.set_text(packages[p].hints[k])
# XXX: this is doing all the work to generate suggestions, which
# we then ignore, so could be written much more efficiently
for err in chkr:
# logging.error("package '%s', hint '%s': Is '%s' a word?" % (p, k, err.word))
misspellings.setdefault(err.word, 0)
misspellings[err.word] += 1
# summarize
for c in sorted(misspellings, key=misspellings.get, reverse=True):
print('%16s: %4d' % (c, misspellings[c]))
评论列表
文章目录