def script(char):
""" Return the four-letter script code assigned to the Unicode character
'char' as string.
>>> script("a")
'Latn'
>>> script(",")
'Zyyy'
>>> script(unichr(0x10FFFF))
'Zzzz'
"""
code = byteord(char)
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
# comes after (to the right of) any existing entries of x in a, and it
# partitions array a into two halves so that, for the left side
# all(val <= x for val in a[lo:i]), and for the right side
# all(val > x for val in a[i:hi]).
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
# breakpoints); we want to use `bisect_right` to look up the range that
# contains the given codepoint: i.e. whose start is less than or equal
# to the codepoint. Thus, we subtract -1 from the index returned.
i = bisect_right(Scripts.RANGES, code)
return Scripts.VALUES[i-1]
评论列表
文章目录