urlutils.py 文件源码

python
阅读 34 收藏 0 点赞 0 评论 0

项目:py-google-safelist 作者: furio 项目源码 文件源码
def url_permutations(url):
        """Try all permutations of hostname and path which can be applied
        to blacklisted URLs"""
        def url_host_permutations(host):
            if re.match(r'\d+\.\d+\.\d+\.\d+', host):
                yield host
                return
            parts = host.split('.')
            l = min(len(parts),5)
            if l > 4:
                yield host
            for i in xrange(l-1):
                yield '.'.join(parts[i-l:])
        def url_path_permutations(path):
            if path != '/':
                yield path
            query = None
            if '?' in path:
                path, query =  path.split('?', 1)
            if query is not None:
                yield path
            path_parts = path.split('/')[0:-1]
            curr_path = ''
            for i in xrange(min(4, len(path_parts))):
                curr_path = curr_path + path_parts[i] + '/'
                yield curr_path
        protocol, address_str = urllib.splittype(url)
        host, path = urllib.splithost(address_str)
        user, host = urllib.splituser(host)
        host, port = urllib.splitport(host)
        host = host.strip('/')
        for h in url_host_permutations(host):
            for p in url_path_permutations(path):
                yield '%s%s' % (h, p)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号