python类html()的实例源码-面圈网

feedparser.py 文件源码项目：touch-pay-client 作者: HackPucBemobi 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：respeaker_virtualenv 作者: respeaker 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：Problematica-public 作者: TechMaz 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：download-manager 作者: thispc 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _l2bytes(l):
  # Convert a list of ints to bytes if the interpreter is Python 3
  try:
    if bytes is not str:
      # In Python 2.6 and above, this call won't raise an exception
      # but it will return bytes([65]) as '[65]' instead of 'A'
      return bytes(l)
    raise NameError
  except NameError:
    return ''.join(map(chr, l))

# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
#   http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
#   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!

feedparser.py 文件源码项目：download-manager 作者: thispc 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def lookslikehtml(self, s):
        if self.version.startswith('atom'): return
        if self.contentparams.get('type','text/html') != 'text/plain': return

        # must have a close tag or a entity reference to qualify
        if not (re.search(r'</(\w+)>',s) or re.search("&#?\w+;",s)): return

        # all tags must be in a restricted subset of valid HTML tags
        if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements,
            re.findall(r'</?(\w+)',s)): return

        # all entities must have been defined as valid HTML entities
        from htmlentitydefs import entitydefs
        if filter(lambda e: e not in entitydefs.keys(),
            re.findall(r'&(\w+);',s)): return

        return 1

feedparser.py 文件源码项目：download-manager 作者: thispc 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', 'alternate')
        if attrsD['rel'] == 'self':
            attrsD.setdefault('type', 'application/atom+xml')
        else:
            attrsD.setdefault('type', 'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if attrsD.has_key('href'):
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if attrsD.has_key('href'):
            expectingText = 0
            if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：tingbot-apps 作者: WhistleMaster 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：rekall-agent-server 作者: rekall-innovations 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：slugiot-client 作者: slugiot 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：alfred-status-workflow 作者: manosim 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：StuffShare 作者: StuffShare 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def _start_link(self, attrsD):
        attrsD.setdefault('rel', u'alternate')
        if attrsD['rel'] == u'self':
            attrsD.setdefault('type', u'application/atom+xml')
        else:
            attrsD.setdefault('type', u'text/html')
        context = self._getContext()
        attrsD = self._itsAnHrefDamnIt(attrsD)
        if 'href' in attrsD:
            attrsD['href'] = self.resolveURI(attrsD['href'])
        expectingText = self.infeed or self.inentry or self.insource
        context.setdefault('links', [])
        if not (self.inentry and self.inimage):
            context['links'].append(FeedParserDict(attrsD))
        if 'href' in attrsD:
            expectingText = 0
            if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
                context['link'] = attrsD['href']
        else:
            self.push('link', expectingText)

feedparser.py 文件源码项目：touch-pay-client 作者: HackPucBemobi 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _l2bytes(l):
        return bytes(l)

# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
#   http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
#   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!

feedparser.py 文件源码项目：touch-pay-client 作者: HackPucBemobi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def mapContentType(self, contentType):
        contentType = contentType.lower()
        if contentType == 'text' or contentType == 'plain':
            contentType = u'text/plain'
        elif contentType == 'html':
            contentType = u'text/html'
        elif contentType == 'xhtml':
            contentType = u'application/xhtml+xml'
        return contentType

feedparser.py 文件源码项目：touch-pay-client 作者: HackPucBemobi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _start_description(self, attrsD):
        context = self._getContext()
        if 'summary' in context:
            self._summaryKey = 'content'
            self._start_content(attrsD)
        else:
            self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)

feedparser.py 文件源码项目：touch-pay-client 作者: HackPucBemobi 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _start_content_encoded(self, attrsD):
        self.pushContent('content', attrsD, u'text/html', 1)

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def _l2bytes(l):
        return bytes(l)

# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
#   http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
#   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def mapContentType(self, contentType):
        contentType = contentType.lower()
        if contentType == 'text' or contentType == 'plain':
            contentType = u'text/plain'
        elif contentType == 'html':
            contentType = u'text/html'
        elif contentType == 'xhtml':
            contentType = u'application/xhtml+xml'
        return contentType

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _start_description(self, attrsD):
        context = self._getContext()
        if 'summary' in context:
            self._summaryKey = 'content'
            self._start_content(attrsD)
        else:
            self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _start_content_encoded(self, attrsD):
        self.pushContent('content', attrsD, u'text/html', 1)

feedparser.py 文件源码项目：true_review_web2py 作者: lucadealfaro 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _parse_date_rfc822(dt):
    """Parse RFC 822 dates and times, with one minor
    difference: years may be 4DIGIT or 2DIGIT.
    http://tools.ietf.org/html/rfc822#section-5"""
    try:
        m = _rfc822_match(dt.lower()).groupdict(0)
    except AttributeError:
        return None

    return _parse_date_group_rfc822(m)

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _l2bytes(l):
        return bytes(l)

# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
#   http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
#   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def mapContentType(self, contentType):
        contentType = contentType.lower()
        if contentType == 'text' or contentType == 'plain':
            contentType = u'text/plain'
        elif contentType == 'html':
            contentType = u'text/html'
        elif contentType == 'xhtml':
            contentType = u'application/xhtml+xml'
        return contentType

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _start_description(self, attrsD):
        context = self._getContext()
        if 'summary' in context:
            self._summaryKey = 'content'
            self._start_content(attrsD)
        else:
            self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _start_content_encoded(self, attrsD):
        self.pushContent('content', attrsD, u'text/html', 1)

feedparser.py 文件源码项目：spc 作者: whbrewer 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _parse_date_rfc822(dt):
    """Parse RFC 822 dates and times, with one minor
    difference: years may be 4DIGIT or 2DIGIT.
    http://tools.ietf.org/html/rfc822#section-5"""
    try:
        m = _rfc822_match(dt.lower()).groupdict(0)
    except AttributeError:
        return None

    # Calculate a date and timestamp
    for k in ('year', 'day', 'hour', 'minute', 'second'):
        m[k] = int(m[k])
    m['month'] = _rfc822_months.index(m['month']) + 1
    # If the year is 2 digits, assume everything in the 90's is the 1990's
    if m['year'] < 100:
        m['year'] += (1900, 2000)[m['year'] < 90]
    stamp = datetime.datetime(*[m[i] for i in
                ('year', 'month', 'day', 'hour', 'minute', 'second')])

    # Use the timezone information to calculate the difference between
    # the given date and timestamp and Universal Coordinated Time
    tzhour = 0
    tzmin = 0
    if m['tz'] and m['tz'].startswith('gmt'):
        # Handle GMT and GMT+hh:mm timezone syntax (the trailing
        # timezone info will be handled by the next `if` block)
        m['tz'] = ''.join(m['tz'][3:].split(':')) or 'gmt'
    if not m['tz']:
        pass
    elif m['tz'].startswith('+'):
        tzhour = int(m['tz'][1:3])
        tzmin = int(m['tz'][3:])
    elif m['tz'].startswith('-'):
        tzhour = int(m['tz'][1:3]) * -1
        tzmin = int(m['tz'][3:]) * -1
    else:
        tzhour = _rfc822_tznames[m['tz']]
    delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour)

    # Return the date and timestamp in UTC
    return (stamp - delta).utctimetuple()

feedparser.py 文件源码项目：respeaker_virtualenv 作者: respeaker 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _l2bytes(l):
        return bytes(l)

# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
#   http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
#   https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!

feedparser.py 文件源码项目：respeaker_virtualenv 作者: respeaker 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def mapContentType(self, contentType):
        contentType = contentType.lower()
        if contentType == 'text' or contentType == 'plain':
            contentType = u'text/plain'
        elif contentType == 'html':
            contentType = u'text/html'
        elif contentType == 'xhtml':
            contentType = u'application/xhtml+xml'
        return contentType

feedparser.py 文件源码项目：respeaker_virtualenv 作者: respeaker 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _start_description(self, attrsD):
        context = self._getContext()
        if 'summary' in context:
            self._summaryKey = 'content'
            self._start_content(attrsD)
        else:
            self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)