def pinyinify(string):
# TODO: Use static file instead of constructing table in real time
table = dict()
for i in range(sys.maxunicode):
if re.match('P|S|Z|C', unicodedata.category(chr(i))) is not None:
table[i] = '-'
string = string.translate(table)
for char in [x for x in string if unicodedata.name(x).startswith('CJK')]:
string = string.replace(char, pinyin.get(char, format='strip') + '-')
string = re.sub('\-+', '-', string)
return pinyin.get(string, delimiter='', format='strip').lower()
评论列表
文章目录