def _load_unicode_data(self):
'''Loads emoji names from UnicodeData.txt'''
dirnames = (USER_DATADIR, DATADIR,
# On Fedora, the “unicode-ucd” package has the
# UnicodeData.txt file here:
'/usr/share/unicode/ucd')
basenames = ('UnicodeData.txt',)
(path, open_function) = _find_path_and_open_function(
dirnames, basenames)
if not path:
sys.stderr.write(
'_load_unicode_data(): could not find "%s" in "%s"\n'
%(basenames, dirnames))
return
with open_function(path, mode='rt') as unicode_data_file:
for line in unicode_data_file.readlines():
if not line.strip():
continue
codepoint_string, name, category = line.split(';')[:3]
codepoint_integer = int(codepoint_string, 16)
emoji_string = chr(codepoint_integer)
if category in ('Cc', 'Co', 'Cs'):
# Never load control characters (“Cc”), they cause
# too much problems when trying to display
# them. Never load the “First” and “Last”
# characters of private use characters “Co” and
# surrogates (“Cs”) either as these are completely
# useless.
continue
if (not self._unicode_data_all
and not UNICODE_CATEGORIES[category]['valid']
and emoji_string not in VALID_CHARACTERS):
continue
self._add_to_emoji_dict(
(emoji_string, 'en'), 'names', [name.lower()])
self._add_to_emoji_dict(
(emoji_string, 'en'),
'ucategories', [
category,
UNICODE_CATEGORIES[category]['major'],
UNICODE_CATEGORIES[category]['minor'],
]
)
评论列表
文章目录