def unicode_to_ascii_authority(authority):
"""
Follows the steps in RFC 3490, Section 4 to convert a unicode authority
string into its ASCII equivalent.
For example, u'www.Alliancefran\xe7aise.nu' will be converted into
'www.xn--alliancefranaise-npb.nu'
Args:
authority: unicode string, the URL authority component to convert,
e.g. u'www.Alliancefran\xe7aise.nu'
Returns:
string: the US-ASCII character equivalent to the inputed authority,
e.g. 'www.xn--alliancefranaise-npb.nu'
Raises:
Exception: if the function is not able to convert the inputed
authority
@author: Jonathan Benn
"""
# RFC 3490, Section 4, Step 1
# The encodings.idna Python module assumes that AllowUnassigned == True
# RFC 3490, Section 4, Step 2
labels = label_split_regex.split(authority)
# RFC 3490, Section 4, Step 3
# The encodings.idna Python module assumes that UseSTD3ASCIIRules == False
# RFC 3490, Section 4, Step 4
# We use the ToASCII operation because we are about to put the authority
# into an IDN-unaware slot
asciiLabels = []
import encodings.idna
for label in labels:
if label:
asciiLabels.append(to_native(encodings.idna.ToASCII(label)))
else:
# encodings.idna.ToASCII does not accept an empty string, but
# it is necessary for us to allow for empty labels so that we
# don't modify the URL
asciiLabels.append('')
# RFC 3490, Section 4, Step 5
return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels))
评论列表
文章目录