def _is_safe_url(url, host):
# Chrome considers any URL with more than two slashes to be absolute, but
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
if url.startswith('///'):
return False
url_info = urlparse(url)
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
# In that URL, example.com is not the hostname but, a path component. However,
# Chrome will still consider example.com to be the hostname, so we must not
# allow this syntax.
if not url_info.netloc and url_info.scheme:
return False
# Forbid URLs that start with control characters. Some browsers (like
# Chrome) ignore quite a few control characters at the start of a
# URL and might consider the URL as scheme relative.
if unicodedata.category(url[0])[0] == 'C':
return False
return ((not url_info.netloc or url_info.netloc == host) and
(not url_info.scheme or url_info.scheme in ['http', 'https']))
评论列表
文章目录