def url_permutations(url):
"""Try all permutations of hostname and path which can be applied
to blacklisted URLs"""
def url_host_permutations(host):
if re.match(r'\d+\.\d+\.\d+\.\d+', host):
yield host
return
parts = host.split('.')
l = min(len(parts),5)
if l > 4:
yield host
for i in xrange(l-1):
yield '.'.join(parts[i-l:])
def url_path_permutations(path):
if path != '/':
yield path
query = None
if '?' in path:
path, query = path.split('?', 1)
if query is not None:
yield path
path_parts = path.split('/')[0:-1]
curr_path = ''
for i in xrange(min(4, len(path_parts))):
curr_path = curr_path + path_parts[i] + '/'
yield curr_path
protocol, address_str = urllib.splittype(url)
host, path = urllib.splithost(address_str)
user, host = urllib.splituser(host)
host, port = urllib.splitport(host)
host = host.strip('/')
for h in url_host_permutations(host):
for p in url_path_permutations(path):
yield '%s%s' % (h, p)
评论列表
文章目录