python类Comment()的实例源码

soup.py 文件源码 项目:v2ex-tornado-2 作者: coderyy 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def getNodeDetails(self, node):
        if isinstance(node, BeautifulSoup): # Document or DocumentFragment
            return (_base.DOCUMENT,)

        elif isinstance(node, Declaration): # DocumentType
            string = unicode(node.string)
            #Slice needed to remove markup added during unicode conversion,
            #but only in some versions of BeautifulSoup/Python
            if string.startswith('<!') and string.endswith('>'):
                string = string[2:-1]
            m = self.doctype_regexp.match(string)
            #This regexp approach seems wrong and fragile
            #but beautiful soup stores the doctype as a single thing and we want the seperate bits
            #It should work as long as the tree is created by html5lib itself but may be wrong if it's
            #been modified at all
            #We could just feed to it a html5lib tokenizer, I guess...
            assert m is not None, "DOCTYPE did not match expected format"

            name = m.group('name')
            publicId = m.group('publicId')
            if publicId is not None:
                systemId = m.group('systemId1')
            else:
                systemId = m.group('systemId2')
            return _base.DOCTYPE, name, publicId or "", systemId or ""

        elif isinstance(node, Comment):
            string = unicode(node.string)
            if string.startswith('<!--') and string.endswith('-->'):
                string = string[4:-3]
            return _base.COMMENT, string

        elif isinstance(node, unicode): # TextNode
            return _base.TEXT, node

        elif isinstance(node, Tag): # Element
            return (_base.ELEMENT, namespaces["html"], node.name,
                    dict(node.attrs).items(), node.contents)
        else:
            return _base.UNKNOWN, node.__class__.__name__
soup.py 文件源码 项目:v2ex-tornado-2 作者: coderyy 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def commentClass(self, data):
        return TextNode(Comment(data), self.soup)
autolinks.py 文件源码 项目:touch-pay-client 作者: HackPucBemobi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:true_review_web2py 作者: lucadealfaro 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:spc 作者: whbrewer 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:Problematica-public 作者: TechMaz 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:rekall-agent-server 作者: rekall-innovations 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:slugiot-client 作者: slugiot 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
autolinks.py 文件源码 项目:StuffShare 作者: StuffShare 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def expand_html(html, cdict=None):
    if not have_soup:
        raise RuntimeError("Missing BeautifulSoup")
    soup = BeautifulSoup(html)
    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]
    for txt in soup.findAll(text=True):
        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
            ntxt = regex_link.sub(
                lambda match: expand_one(match.group(0), cdict), txt)
            txt.replaceWith(BeautifulSoup(ntxt))
    return str(soup)
soup.py 文件源码 项目:v2ex-tornado-2 作者: coderyy 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def testSerializer(element):
    import re
    rv = []
    def serializeElement(element, indent=0):
        if isinstance(element, Declaration):
            doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
            m = re.compile(doctype_regexp).match(element.string)
            assert m is not None, "DOCTYPE did not match expected format"
            name = m.group('name')
            publicId = m.group('publicId')
            if publicId is not None:
                systemId = m.group('systemId1') or ""
            else:
                systemId = m.group('systemId2')

            if publicId is not None or systemId is not None:
                rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%
                          (' '*indent, name, publicId or "", systemId or ""))
            else:
                rv.append("|%s<!DOCTYPE %s>"%(' '*indent, name))

        elif isinstance(element, BeautifulSoup):
            if element.name == "[document_fragment]":
                rv.append("#document-fragment")                
            else:
                rv.append("#document")

        elif isinstance(element, Comment):
            rv.append("|%s<!-- %s -->"%(' '*indent, element.string))
        elif isinstance(element, unicode):
            rv.append("|%s\"%s\"" %(' '*indent, element))
        else:
            rv.append("|%s<%s>"%(' '*indent, element.name))
            if element.attrs:
                for name, value in sorted(element.attrs):
                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
        indent += 2
        if hasattr(element, "contents"):
            for child in element.contents:
                serializeElement(child, indent)
    serializeElement(element, 0)

    return "\n".join(rv)


问题


面经


文章

微信
公众号

扫码关注公众号