sanitizer.py 文件源码-python代码片段

def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                uri = urlparse.urlparse(val_unescaped)
                if uri:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = content_type_rgx.match(uri.path)
                        if not m:
                            del attrs[attr]
                        if m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token