itb_emoji.py 文件源码-python代码片段

def _load_unicode_emoji_zwj_sequences(self):
        '''
        Loads emoji property data from emoji-zwj-sequences.txt

        http://unicode.org/Public/emoji/5.0/emoji-zwj-sequences.txt
        '''
        dirnames = (USER_DATADIR, DATADIR)
        basenames = ('emoji-zwj-sequences.txt',)
        (path, open_function) = _find_path_and_open_function(
            dirnames, basenames)
        if not path:
            sys.stderr.write(
                '_load_unicode_emoji_zwj_sequences(): could not find "%s" in "%s"\n'
                %(basenames, dirnames))
            return
        with open_function(path, mode='rt') as unicode_emoji_zwj_sequences_file:
            for line in unicode_emoji_zwj_sequences_file.readlines():
                unicode_version = ''
                pattern = re.compile(
                    r'[^;]*;[^;]*;[^;]*#\s*(?P<uversion>[0-9]+\.[0-9]+)\s*'
                    + r'\[[0-9]+\]')
                match  = pattern.match(line)
                if match and match.group('uversion'):
                    unicode_version = match.group('uversion')
                line = re.sub(r'#.*$', '', line).strip()
                if not line:
                    continue
                codepoints, property, name = [
                    x.strip() for x in line.split(';')[:3]]
                emoji_string = ''
                for codepoint in codepoints.split(' '):
                    emoji_string += chr(int(codepoint, 16))
                if emoji_string:
                    self._add_to_emoji_dict(
                        (emoji_string, 'en'), 'properties', [property])
                    self._add_to_emoji_dict(
                        (emoji_string, 'en'), 'names', [name.lower()])
                    if unicode_version:
                        self._add_to_emoji_dict(
                            (emoji_string, 'en'), 'uversion', unicode_version)