python类urlparse()的实例源码-第2页-面圈网

sanitizer.py 文件源码项目：chalktalk_docs 作者: loremIpsum1771 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                uri = urlparse.urlparse(val_unescaped)
                if uri:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        if m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：sslstrip-hsts-openwrt 作者: adde88 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：Flask-NvRay-Blog 作者: rui7157 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：Flask-NvRay-Blog 作者: rui7157 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：My-Web-Server-Framework-With-Python2.7 作者: syjsu 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：WebAct 作者: CreatCodeBuild 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

general_name.py 文件源码项目：OneClickDTU 作者: satwikkansal 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, value):
        if not isinstance(value, six.text_type):
            raise TypeError("value must be a unicode string")

        parsed = urllib_parse.urlparse(value)
        if not parsed.hostname:
            netloc = ""
        elif parsed.port:
            netloc = (
                idna.encode(parsed.hostname) +
                ":{0}".format(parsed.port).encode("ascii")
            ).decode("ascii")
        else:
            netloc = idna.encode(parsed.hostname).decode("ascii")

        # Note that building a URL in this fashion means it should be
        # semantically indistinguishable from the original but is not
        # guaranteed to be exactly the same.
        uri = urllib_parse.urlunparse((
            parsed.scheme,
            netloc,
            parsed.path,
            parsed.params,
            parsed.query,
            parsed.fragment
        )).encode("ascii")

        self._value = value
        self._encoded = uri

sanitizer.py 文件源码项目：python-on 作者: hemangsk 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：python-on 作者: hemangsk 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：python-on 作者: hemangsk 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：remoteControlPPT 作者: htwenning 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

locked.py 文件源码项目：cloud-custodian 作者: capitalone 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __call__(self, r):
        url = urlparse(r.url)
        path = url.path or '/'
        qs = url.query and '?%s' % url.query or ''
        safe_url = url.scheme + '://' + url.netloc.split(':')[0] + path + qs
        request = AWSRequest(
            method=r.method.upper(), url=safe_url, data=r.body)
        SigV4Auth(
            self.credentials, self.service, self.region).add_auth(request)
        r.headers.update(dict(request.headers.items()))
        return r

general_name.py 文件源码项目：xxNet 作者: drzorm 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __init__(self, value):
        if not isinstance(value, six.text_type):
            raise TypeError("value must be a unicode string")

        parsed = urllib_parse.urlparse(value)
        if not parsed.hostname:
            netloc = ""
        elif parsed.port:
            netloc = (
                idna.encode(parsed.hostname) +
                ":{0}".format(parsed.port).encode("ascii")
            ).decode("ascii")
        else:
            netloc = idna.encode(parsed.hostname).decode("ascii")

        # Note that building a URL in this fashion means it should be
        # semantically indistinguishable from the original but is not
        # guaranteed to be exactly the same.
        uri = urllib_parse.urlunparse((
            parsed.scheme,
            netloc,
            parsed.path,
            parsed.params,
            parsed.query,
            parsed.fragment
        )).encode("ascii")

        self._value = value
        self._encoded = uri

general_name.py 文件源码项目：xxNet 作者: drzorm 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, value):
        if not isinstance(value, six.text_type):
            raise TypeError("value must be a unicode string")

        parsed = urllib_parse.urlparse(value)
        if not parsed.hostname:
            netloc = ""
        elif parsed.port:
            netloc = (
                idna.encode(parsed.hostname) +
                ":{0}".format(parsed.port).encode("ascii")
            ).decode("ascii")
        else:
            netloc = idna.encode(parsed.hostname).decode("ascii")

        # Note that building a URL in this fashion means it should be
        # semantically indistinguishable from the original but is not
        # guaranteed to be exactly the same.
        uri = urllib_parse.urlunparse((
            parsed.scheme,
            netloc,
            parsed.path,
            parsed.params,
            parsed.query,
            parsed.fragment
        )).encode("ascii")

        self._value = value
        self._encoded = uri

registry.py 文件源码项目：registry 作者: boundlessgeo 项目源码文件源码阅读 76 收藏 0 点赞 0 评论 0

def parse_url(url):
    parsed_url = urlparse(url)
    catalog_slug = parsed_url.path.split('/')[2]

    return catalog_slug

registry.py 文件源码项目：registry 作者: boundlessgeo 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self, *args, **kwargs):
        self.catalog = None
        if args and hasattr(args[0], 'url'):
            url = args[0].url
            self.catalog = parse_url(url) if urlparse(url).path != '/csw' else None
        try:
            self.es, self.version = es_connect(url=REGISTRY_SEARCH_URL)
            self.es_status = 200
        except requests.exceptions.ConnectionError:
            self.es_status = 404

        database = PYCSW['repository']['database']

        return super(RegistryRepository, self).__init__(database, context=config.StaticContext())

registry.py 文件源码项目：registry 作者: boundlessgeo 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def check_netloc(layer):
    netloc = urlparse(layer.source).netloc
    if netloc in netlocs_dic.keys():
        netlocs_dic[netloc]['counter'] += 1
    else:
        netlocs_dic[netloc] = {
            'counter': 1
        }

    return netloc

sanitizer.py 文件源码项目：ASE-Fall2016 作者: Dai0526 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：vmmenu 作者: piggyking 项目源码文件源码阅读 53 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token

sanitizer.py 文件源码项目：vmmenu 作者: piggyking 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token