hunspell_suggest.py 文件源码-python代码片段

def load_dictionary(self):
        '''Load a hunspell dictionary and instantiate a
        enchant.Dict() or a hunspell.Hunspell() object.

        '''
        if DEBUG_LEVEL > 0:
            sys.stderr.write("load_dictionary() ...\n")
        (self.dic_path,
         self.encoding,
         self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name)
        if self.words:
            # List of languages where accent insensitive matching makes sense:
            accent_languages = (
                'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb',
                'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo',
                'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr',
                'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb',
                'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds',
                'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu',
                'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq',
                'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz',
                've', 'vi', 'wa', 'xh',
            )
            if self.name.split('_')[0] in accent_languages:
                self.word_pairs = [
                    (x, itb_util.remove_accents(x))
                    for x in self.words
                ]
            for x in self.words:
                if len(x) > self.max_word_len:
                    self.max_word_len = len(x)
            if DEBUG_LEVEL > 1:
                sys.stderr.write(
                    'load_dictionary() max_word_len = %s\n'
                    % self.max_word_len)
            if IMPORT_ENCHANT_SUCCESSFUL:
                self.enchant_dict = enchant.Dict(self.name)
            elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path:
                aff_path = self.dic_path.replace('.dic', '.aff')
                self.pyhunspell_object = hunspell.HunSpell(self.dic_path, aff_path)