python类Doctype()的实例源码

EasyLogin.py 文件源码 项目:cc98 作者: zjuchenyuan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def text(self, target=None, ignore_pureascii_words=False):
        """
        Get all text in HTML, skip script and comment
        :param target: the BeatuifulSoup object, default self.b
        :param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website)
        :return: list of str
        """
        if target is None:
            target = self.b
        from bs4 import Comment
        from bs4.element import NavigableString,Doctype
        result = []
        for descendant in target.descendants:
            if not isinstance(descendant, NavigableString) \
                    or isinstance(descendant,Doctype) \
                    or descendant.parent.name in ["script", "style"] \
                    or isinstance(descendant, Comment) \
                    or "none" in descendant.parent.get("style","")\
                    or "font-size:0px" in descendant.parent.get("style",""):
                continue
            data = descendant.strip()
            if len(data) > 0:
                if not ignore_pureascii_words or any([ord(i)>127 for i in data]):
                    if PY2:
                        result.append(data.encode())
                    else:
                        result.append(data)
        return result
EasyLogin.py 文件源码 项目:daily_notification 作者: zjuchenyuan 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def text(self, target=None, ignore_pureascii_words=False):
        """
        Get all text in HTML, skip script and comment
        :param target: the BeatuifulSoup object, default self.b
        :param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website)
        :return: list of str
        """
        if target is None:
            target = self.b
        from bs4 import Comment
        from bs4.element import NavigableString,Doctype
        result = []
        for descendant in target.descendants:
            if not isinstance(descendant, NavigableString) \
                    or isinstance(descendant,Doctype) \
                    or descendant.parent.name in ["script", "style"] \
                    or isinstance(descendant, Comment) \
                    or "none" in descendant.parent.get("style","")\
                    or "font-size:0px" in descendant.parent.get("style",""):
                continue
            data = descendant.strip()
            if len(data) > 0:
                if not ignore_pureascii_words or any([ord(i)>127 for i in data]):
                    if PY2:
                        result.append(data.encode())
                    else:
                        result.append(data)
        return result
elements.py 文件源码 项目:zpretty 作者: collective 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def is_doctype(self):
        ''' Check if this element is a doctype
        '''
        return isinstance(self.context, Doctype)
test_lxml.py 文件源码 项目:sack 作者: jofpin 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _test_doctype(self, doctype_fragment):
        """Run a battery of assertions on a given doctype string."""
        doctype_str = '<!DOCTYPE %s>' % doctype_fragment
        markup = doctype_str + '<p>foo</p>'
        soup = self.soup(markup)
        doctype = soup.contents[0]
        self.assertEqual(doctype.__class__, Doctype)
        self.assertEqual(doctype, doctype_fragment)
        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)

        # Make sure that the doctype was correctly associated with the
        # parse tree and that the rest of the document parsed.
        self.assertEqual(soup.p.contents[0], 'foo')


问题


面经


文章

微信
公众号

扫码关注公众号