def load_dictionary(self):
'''Load a hunspell dictionary and instantiate a
enchant.Dict() or a hunspell.Hunspell() object.
'''
if DEBUG_LEVEL > 0:
sys.stderr.write("load_dictionary() ...\n")
(self.dic_path,
self.encoding,
self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name)
if self.words:
# List of languages where accent insensitive matching makes sense:
accent_languages = (
'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb',
'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo',
'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr',
'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb',
'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds',
'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu',
'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq',
'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz',
've', 'vi', 'wa', 'xh',
)
if self.name.split('_')[0] in accent_languages:
self.word_pairs = [
(x, itb_util.remove_accents(x))
for x in self.words
]
for x in self.words:
if len(x) > self.max_word_len:
self.max_word_len = len(x)
if DEBUG_LEVEL > 1:
sys.stderr.write(
'load_dictionary() max_word_len = %s\n'
% self.max_word_len)
if IMPORT_ENCHANT_SUCCESSFUL:
self.enchant_dict = enchant.Dict(self.name)
elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path:
aff_path = self.dic_path.replace('.dic', '.aff')
self.pyhunspell_object = hunspell.HunSpell(self.dic_path, aff_path)
hunspell_suggest.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录