def urlify(s, maxlen=80, keep_underscores=False):
"""
Converts incoming string to a simplified ASCII subset.
if (keep_underscores): underscores are retained in the string
else: underscores are translated to hyphens (default)
"""
s = to_unicode(s) # to unicode
s = s.lower() # to lowercase
s = unicodedata.normalize('NFKD', s) # replace special characters
s = to_native(s, charset='ascii', errors='ignore') # encode as ASCII
s = re.sub('&\w+?;', '', s) # strip html entities
if keep_underscores:
s = re.sub('\s+', '-', s) # whitespace to hyphens
s = re.sub('[^\w\-]', '', s)
# strip all but alphanumeric/underscore/hyphen
else:
s = re.sub('[\s_]+', '-', s) # whitespace & underscores to hyphens
s = re.sub('[^a-z0-9\-]', '', s) # strip all but alphanumeric/hyphen
s = re.sub('[-_][-_]+', '-', s) # collapse strings of hyphens
s = s.strip('-') # remove leading and trailing hyphens
return s[:maxlen] # enforce maximum length
评论列表
文章目录