def _load_unicode_emoji_zwj_sequences(self):
'''
Loads emoji property data from emoji-zwj-sequences.txt
http://unicode.org/Public/emoji/5.0/emoji-zwj-sequences.txt
'''
dirnames = (USER_DATADIR, DATADIR)
basenames = ('emoji-zwj-sequences.txt',)
(path, open_function) = _find_path_and_open_function(
dirnames, basenames)
if not path:
sys.stderr.write(
'_load_unicode_emoji_zwj_sequences(): could not find "%s" in "%s"\n'
%(basenames, dirnames))
return
with open_function(path, mode='rt') as unicode_emoji_zwj_sequences_file:
for line in unicode_emoji_zwj_sequences_file.readlines():
unicode_version = ''
pattern = re.compile(
r'[^;]*;[^;]*;[^;]*#\s*(?P<uversion>[0-9]+\.[0-9]+)\s*'
+ r'\[[0-9]+\]')
match = pattern.match(line)
if match and match.group('uversion'):
unicode_version = match.group('uversion')
line = re.sub(r'#.*$', '', line).strip()
if not line:
continue
codepoints, property, name = [
x.strip() for x in line.split(';')[:3]]
emoji_string = ''
for codepoint in codepoints.split(' '):
emoji_string += chr(int(codepoint, 16))
if emoji_string:
self._add_to_emoji_dict(
(emoji_string, 'en'), 'properties', [property])
self._add_to_emoji_dict(
(emoji_string, 'en'), 'names', [name.lower()])
if unicode_version:
self._add_to_emoji_dict(
(emoji_string, 'en'), 'uversion', unicode_version)
评论列表
文章目录