def _load_unicode_emoji_data(self):
'''
Loads emoji property data from emoji-data.txt
http://unicode.org/Public/emoji/5.0/emoji-data.txt
'''
dirnames = (USER_DATADIR, DATADIR)
basenames = ('emoji-data.txt',)
(path, open_function) = _find_path_and_open_function(
dirnames, basenames)
if not path:
sys.stderr.write(
'_load_unicode_emoji_data(): could not find "%s" in "%s"\n'
%(basenames, dirnames))
return
with open_function(path, mode='rt') as unicode_emoji_data_file:
for line in unicode_emoji_data_file.readlines():
unicode_version = ''
pattern = re.compile(
r'[^;]*;[^;]*#\s*(?P<uversion>[0-9]+\.[0-9]+)\s*'
+ r'\[[0-9]+\]')
match = pattern.match(line)
if match and match.group('uversion'):
unicode_version = match.group('uversion')
line = re.sub(r'#.*$', '', line).strip()
if not line:
continue
codepoint_string, property = [
x.strip() for x in line.split(';')[:2]]
codepoint_range = [
int(x, 16) for x in codepoint_string.split('..')]
if len(codepoint_range) == 1:
codepoint_range.append(codepoint_range[0])
assert len(codepoint_range) == 2
for codepoint in range(
codepoint_range[0], codepoint_range[1] + 1):
emoji_string = chr(codepoint)
self._add_to_emoji_dict(
(emoji_string, 'en'), 'properties', [property])
if unicode_version:
self._add_to_emoji_dict(
(emoji_string, 'en'), 'uversion', unicode_version)
评论列表
文章目录