itb_emoji.py 文件源码-python代码片段

def _load_unicode_data(self):
        '''Loads emoji names from UnicodeData.txt'''
        dirnames = (USER_DATADIR, DATADIR,
                    # On Fedora, the “unicode-ucd” package has the
                    # UnicodeData.txt file here:
                    '/usr/share/unicode/ucd')
        basenames = ('UnicodeData.txt',)
        (path, open_function) = _find_path_and_open_function(
            dirnames, basenames)
        if not path:
            sys.stderr.write(
                '_load_unicode_data(): could not find "%s" in "%s"\n'
                %(basenames, dirnames))
            return
        with open_function(path, mode='rt') as unicode_data_file:
            for line in unicode_data_file.readlines():
                if not line.strip():
                    continue
                codepoint_string, name, category = line.split(';')[:3]
                codepoint_integer = int(codepoint_string, 16)
                emoji_string = chr(codepoint_integer)
                if category in ('Cc', 'Co', 'Cs'):
                    # Never load control characters (“Cc”), they cause
                    # too much problems when trying to display
                    # them. Never load the “First” and “Last”
                    # characters of private use characters “Co” and
                    # surrogates (“Cs”) either as these are completely
                    # useless.
                    continue
                if (not self._unicode_data_all
                        and not UNICODE_CATEGORIES[category]['valid']
                        and emoji_string not in VALID_CHARACTERS):
                    continue
                self._add_to_emoji_dict(
                    (emoji_string, 'en'), 'names', [name.lower()])
                self._add_to_emoji_dict(
                    (emoji_string, 'en'),
                    'ucategories', [
                        category,
                        UNICODE_CATEGORIES[category]['major'],
                        UNICODE_CATEGORIES[category]['minor'],
                    ]
                )