def unicode_iter(val):
"""Provides an iterator over the *code points* of the given Unicode sequence.
Notes:
Before PEP-393, Python has the potential to support Unicode as UTF-16 or UTF-32.
This is reified in the property as ``sys.maxunicode``. As a result, naive iteration
of Unicode sequences will render non-character code points such as UTF-16 surrogates.
Args:
val (unicode): The unicode sequence to iterate over as integer code points in the range
``0x0`` to ``0x10FFFF``.
"""
val_iter = iter(val)
while True:
code_point = next(_next_code_point(val, val_iter, to_int=ord))
if code_point is None:
raise ValueError('Unpaired high surrogate at end of Unicode sequence: %r' % val)
yield code_point
评论列表
文章目录