python类tostring()的实例源码

lxml_test.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def lxml_test():
    url = "http://www.caixunzz.com"
    req = urllib2.Request(url=url)
    resp = urllib2.urlopen(req)
    #print resp.read()
    '''
    parse_body=html.fromstring(resp.read())
    href=parse_body.xpath('//a[@class="label"]/@href')
    print href
    #not working from above
    '''

    tree = etree.HTML(resp.read())
    href = tree.xpath('//a[@class="label"]/@href')
    #print href.tag
    for i in href:
        #print html.tostring(i)
        #print type(i)
        print i

    print type(href)

#not working yet
store.py 文件源码 项目:BlogSpider 作者: hack4code 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def process_item(self, item, spider):
        if item is not None:
            doc = item['content']
            if not isinstance(doc,
                              (str, bytes)):
                if isinstance(doc,
                              HtmlElement):
                    item['content'] = tostring(doc,
                                               encoding='UTF-8',
                                               pretty_print=True,
                                               method='html')
                    item['encoding'] = 'UTF-8'
                else:
                    raise Exception((
                        'Error in store pipeline unsupported doc type[{}]'
                        ).format(doc.__class__.__name__))

            item_ = dict(item)
            item_['lang'] = get_article_lang(item)
            item_['spider'] = spider._id
            item_['source'] = spider.title
            item_['category'] = get_category(item_)
            if not is_exists_article(item_):
                save_article(item_)
        return item
test_views.py 文件源码 项目:gooderp_org 作者: osbzr 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def setUp(self):
        super(TestViewSaving, self).setUp()
        self.arch = h.DIV(
            h.DIV(
                h.H3("Column 1"),
                h.UL(
                    h.LI("Item 1"),
                    h.LI("Item 2"),
                    h.LI("Item 3"))),
            h.DIV(
                h.H3("Column 2"),
                h.UL(
                    h.LI("Item 1"),
                    h.LI(h.SPAN("My Company", attrs(model='res.company', id=1, field='name', type='char'))),
                    h.LI(h.SPAN("+00 00 000 00 0 000", attrs(model='res.company', id=1, field='phone', type='char')))
                ))
        )
        self.view_id = self.registry('ir.ui.view').create(self.cr, self.uid, {
            'name': "Test View",
            'type': 'qweb',
            'arch': ET.tostring(self.arch, encoding='utf-8').decode('utf-8')
        })
xml.py 文件源码 项目:ingestors 作者: alephdata 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def ingest(self, file_path):
        """Ingestor implementation."""
        file_size = self.result.size or os.path.getsize(file_path)
        if file_size > self.MAX_SIZE:
            raise ProcessingException("XML file is too large.")

        try:
            doc = etree.parse(file_path)
        except (ParserError, ParseError):
            raise ProcessingException("XML could not be parsed.")

        text = self.extract_html_text(doc.getroot())
        transform = etree.XSLT(self.XSLT)
        html_doc = transform(doc)
        html_body = html.tostring(html_doc,
                                  encoding='unicode',
                                  pretty_print=True)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text)
TestReport.py 文件源码 项目:WebAutomaiton 作者: AlvinXuCH 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def WriteHTML(self,testcaseinfo):

        self.CreateHtmlFile()

        f = open(self.reportfile,"r")

        htmlcontent = f.read()
        f.close()
        #tree = mytree.fromstring(str(htmlcontent))
        htmlcontent.encode('utf-8')
        tree = html.fromstring(htmlcontent)
        tableElem = tree.find(".//table")
        if testcaseinfo.result == "Failed":
            mytablerow = "<tr><td>{0}</td><td>{1}</td><td>{2}</td><td bgcolor=\"#FF0000\">{3}</td><td>{4}</td><td>{5}</td><td>{6}</td><td>{7}</td></tr>".format(testcaseinfo.id,testcaseinfo.name,testcaseinfo.owner,testcaseinfo.result,testcaseinfo.starttime,testcaseinfo.endtime,testcaseinfo.secondsDuration,testcaseinfo.errorinfo)
        else:
            mytablerow = "<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td><td>{7}</td></tr>".format(testcaseinfo.id,testcaseinfo.name,testcaseinfo.owner,testcaseinfo.result,testcaseinfo.starttime,testcaseinfo.endtime,testcaseinfo.secondsDuration,testcaseinfo.errorinfo)
        tableElem.append(mytree.HTML(str(mytablerow)))

        f = open(self.reportfile,"w")
        #html.tostring
        newContent = repr(html.tostring(tree,method="html",with_tail=False))
        newContent = newContent.replace(r"\n","").replace(r"\t","").replace('b\'',"")
        newContent = newContent[:len(newContent)-1]
        f.write(newContent)
        f.close()
parsers.py 文件源码 项目:crestify 作者: crestify 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, file_name, user_id):
        with open(file_name, 'r') as self.opened_file:
            #  So Instapaper doesn't close <li> tags
            #  This was causing infinite recursion when using BS directly
            #  Hence why the stuff below is being done, so that the <li> tags get closed
            self.html = html.document_fromstring(self.opened_file.read())
            self.html = html.tostring(self.html)
        self.soup = BeautifulSoup4(self.html)
        self.user = user_id
        self.urls = dict()
        self.check_duplicates = dict()
        self.check_duplicates_query = Bookmark.query.filter(Bookmark.user == self.user,
                                                            Bookmark.deleted == False).all()
        for bmark in self.check_duplicates_query:
            self.check_duplicates[bmark.main_url] = bmark
        self.tags_dict = dict()
        self.tags_set = set()
        self.valid_url = re.compile(
            r'^(?:[a-z0-9\.\-]*)://'
            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|'
            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|'
            r'\[?[A-F0-9]*:[A-F0-9:]+\]?)'
            r'(?::\d+)?'
            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
clean_input.py 文件源码 项目:idealoom 作者: conversence 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _sanitize_html_frags(html_value, valid_tags, valid_attributes):
    fragments = html.fragments_fromstring(html_value)
    for f in fragments:
        if isinstance(f, html.HtmlElement):
            _sanitize_html_rec(f, valid_tags, valid_attributes)
            if f.tag in valid_tags:
                _clean_attributes(f, valid_attributes)
                yield html.tostring(f, encoding="unicode")
            else:
                if f.text:
                    yield f.text
                for sub in f:
                    yield html.tostring(sub, encoding="unicode")
                if f.tail:
                    yield f.tail
                if f.tag in ('p', 'br'):
                    yield '\n'
        else:
            yield f
__init__.py 文件源码 项目:calibre_dangdang 作者: qunxyz 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def totext(self, elem):
        return self.tostring(elem, encoding=unicode, method='text').strip()
__init__.py 文件源码 项目:calibre_dangdang 作者: qunxyz 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def parse_results_page(self, root):  # {{{
        from lxml.html import tostring

        matches = []

        def title_ok(title):
            title = title.lower()
            bad = ['bulk pack', '[audiobook]', '[audio cd]', '(a book companion)', '( slipcase with door )', ': free sampler']
            for x in bad:
                if x in title:
                    return False
            # if title and title[0] in '[{' and re.search(r'\(\s*author\s*\)', title) is not None:
            #     # Bad entries in the catalog
            #     return False
            return True

        for a in root.xpath(r'//li[starts-with(@class, "line")]//a[@href and contains(@name, "itemlist-picture")]'):
            # title = a.get('title')
            # if title_ok(title):
            url = a.get('href')
            if url.startswith('/'):
                url = 'http://product.dangdang.com/%s' % (url)
            matches.append(url)

        # Keep only the top 5 matches as the matches are sorted by relevance by
        # Amazon so lower matches are not likely to be very relevant
        return matches[:5]
    # }}}
cleanhtml.py 文件源码 项目:zing 作者: evernote 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def url_trim(html):
    """Trims anchor texts that are longer than 70 chars."""
    fragment = fromstring(html)
    for el, attrib_, link_, pos_ in fragment.iterlinks():
        new_link_text = trim_url(el.text_content())
        el.text = new_link_text

    return mark_safe(tostring(fragment, encoding=unicode))
lxml_test.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def lxml_case3():


    text = '''
    <div>
        <ul>
             <li class="item-0"><a href="link1.html">first item</a></li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-inactive"><a href="link3.html">third item><span>Hello world</span></a></li>
             <li class="item-1"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a>
             <li class="de-item-0"><a href="link5.html">fifth item</a>
         </ul>
     </div>
    '''

    tree=etree.HTML(text)
    html_s=etree.tostring(tree)
    #print html_s
    #print tree.xpath('//li//span/text()')[0]
    '''
    reg_case=tree.xpath('//*[starts-with(@class,"item")]')
    for i in reg_case:
        print i.xpath('.//a/@href')
    '''
    result=tree.xpath(r'//*[re:match(@class, "item-0")]')
    print result

    for i in result[0]:
        print i.xpath('.//a/@href')
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_body(self):
            html = '''<body><p>test</p></body>'''
            res = b'''<html><body><p>test</p></body></html>'''
            tree = self.soupparser.fromstring(html)
            self.assertEqual(tostring(tree), res)
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_head_body(self):
            # HTML tag missing, parser should fix that
            html = '<head><title>test</title></head><body><p>test</p></body>'
            res = b'<html><head><title>test</title></head><body><p>test</p></body></html>'
            tree = self.soupparser.fromstring(html)
            self.assertEqual(tostring(tree), res)
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_wrap_html(self):
            # <head> outside <html>, parser should fix that
            html = '<head><title>title</test></head><html><body/></html>'
            res = b'<html><head><title>title</title></head><body></body></html>'
            tree = self.soupparser.fromstring(html)
            self.assertEqual(tostring(tree), res)
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_comment_pi(self):
            html = '''<!-- comment -->
<?test asdf?>
<head><title>test</title></head><body><p>test</p></body>
<!-- another comment -->'''
            res = b'''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<!-- comment --><?test asdf?><html><head><title>test</title></head><body><p>test</p></body></html><!-- another comment -->'''
            tree = self.soupparser.fromstring(html).getroottree()
            self.assertEqual(tostring(tree, method='html'), res)
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_doctype1(self):
            # Test document type declaration, comments and PI's
            # outside the root
            html = \
'''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<!--another comment--><html><head><title>My first HTML document</title></head><body><p>Hello world!</p></body></html><?foo bar>'''

            res = \
b'''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<!--another comment--><html><head><title>My first HTML document</title></head><body><p>Hello world!</p></body></html><?foo bar?>'''

            tree = self.soupparser.fromstring(html).getroottree()
            self.assertEqual(tree.docinfo.public_id, "-//W3C//DTD HTML 4.01//EN")
            self.assertEqual(tostring(tree), res)
test_elementsoup.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_doctype_html5(self):
            # html 5 doctype declaration
            html = b'<!DOCTYPE html>\n<html lang="en"></html>'

            tree = self.soupparser.fromstring(html).getroottree()
            self.assertTrue(tree.docinfo.public_id is None)
            self.assertEqual(tostring(tree), html)
proceedings_xml.py 文件源码 项目:europarl 作者: chozelinek 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_language(self, s_intervention, p, i_lang, new_paragraphs):
        language = p.xpath('.//span[@class="italic"][text()[re:test(.,"^[\xad\s\.—–\-?,\(]*({})[\xad\s\.—–\-?,\)]*")]]'.format('|'.join(self.langs)), namespaces=self.ns)
        if len(language) > 0 and not self.explanations_of_vote.match(language[0].text):
            lang = re.match(
                r'.*({}).*'.format('|'.join(self.langs)),
                language[0].text)
            output = lang.group(1)
            for l in language:
                l.drop_tree()
        else:
            p = html.tostring(p, with_tail=True, encoding='utf-8').decode('utf-8')
            lang_in_text = re.search(
                r'\(({})\)'.format('|'.join(self.langs)),
                p)
            if lang_in_text is not None:
                output = lang_in_text.group(1)
                p = re.sub(r'\(({})\) *'.format('|'.join(self.langs)), r'', p)
            else:
                if len(new_paragraphs) == 0:
                    if 'role' in s_intervention.keys():
                        president_pattern = '|'.join(self.loc['president'])
                        if re.match(r'{}\Z'.format(president_pattern), s_intervention['role']):
                                output = 'unknown'
                        else:
                            if i_lang is None:
                                output = self.language.upper()
                            else:
                                output = i_lang
                    else:
                        if i_lang is None:
                            output = self.language.upper()
                        else:
                            output = i_lang
                else:
                    output = new_paragraphs[-1]['language']
            p = html.fromstring(p)
        return output, p
proceedings_xml.py 文件源码 项目:europarl 作者: chozelinek 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def serialize(self, infile, root):
        ofile_name = os.path.splitext(os.path.basename(infile))[0]
        ofile_path = os.path.join(self.outdir, ofile_name+'.xml')
        xml = etree.tostring(
            root,
            encoding='utf-8',
            xml_declaration=True,
            pretty_print=True).decode('utf-8')
        with open(ofile_path, mode='w', encoding='utf-8') as ofile:
            ofile.write(xml)
        pass
meps_ie.py 文件源码 项目:europarl 作者: chozelinek 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def serialize(self, infile, root):
        ofile_name = os.path.splitext(os.path.basename(infile))[0]
        ofile_path = os.path.join(self.outdir, ofile_name+'.xml')
        xml = etree.tostring(
            root,
            encoding='utf-8',
            xml_declaration=True,
            pretty_print=True).decode('utf-8')
        with open(ofile_path, mode='w', encoding='utf-8') as ofile:
            ofile.write(xml)
        pass
meps_ie.py 文件源码 项目:europarl 作者: chozelinek 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_name(self, tree):
        name = tree.xpath('//li[@class="mep_name"]')[0]
        name = self.rm_a.clean_html(name)
        name = html.tostring(name).decode('utf-8')
        name = re.sub(r'[\t\n]', r'', name)
        name = name.split('<br>')
        name = [html.fromstring(x).text_content() for x in name]
        name = ' '.join(name)
        return name
html_to_telegraph.py 文件源码 项目:html-telegraph-poster 作者: mercuree 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def convert_html_to_telegraph_format(html_string, clean_html=True, output_format="json_string"):
    if clean_html:
        html_string = clean_article_html(html_string)

        body = preprocess_fragments(
            _fragments_from_string(html_string)
        )
        if body is not None:
            desc = [x for x in body.iterdescendants()]
            for tag in desc:
                preprocess_media_tags(tag)
            move_to_top(body)
            post_process(body)
    else:
        fragments = _fragments_from_string(html_string)
        body = fragments[0].getparent() if len(fragments) else None

    content = []
    if body is not None:
        content = [_recursive_convert(x) for x in body.iterchildren()]

    if output_format == 'json_string':
        return json.dumps(content, ensure_ascii=False)
    elif output_format == 'python_list':
        return content
    elif output_format == 'html_string':
        return html.tostring(body, encoding='unicode')
ir_qweb.py 文件源码 项目:gooderp_org 作者: osbzr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def from_html(self, cr, uid, model, field, element, context=None):
        content = []
        if element.text: content.append(element.text)
        content.extend(html.tostring(child)
                       for child in element.iterchildren(tag=etree.Element))
        return '\n'.join(content)
test_views.py 文件源码 项目:gooderp_org 作者: osbzr 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_save(self):
        Company = self.registry('res.company')
        View = self.registry('ir.ui.view')

        replacement = ET.tostring(h.DIV(
            h.H3("Column 2"),
            h.UL(
                h.LI("wob wob wob"),
                h.LI(h.SPAN("Acme Corporation", attrs(model='res.company', id=1, field='name', expression="bob", type='char'))),
                h.LI(h.SPAN("+12 3456789", attrs(model='res.company', id=1, field='phone', expression="edmund", type='char'))),
            )
        ), encoding='utf-8')
        View.save(self.cr, self.uid, res_id=self.view_id, value=replacement,
                  xpath='/div/div[2]')

        company = Company.browse(self.cr, self.uid, 1)
        self.assertEqual(company.name, "Acme Corporation")
        self.assertEqual(company.phone, "+12 3456789")
        self.eq(
            ET.fromstring(View.browse(self.cr, self.uid, self.view_id).arch.encode('utf-8')),
            h.DIV(
                h.DIV(
                    h.H3("Column 1"),
                    h.UL(
                        h.LI("Item 1"),
                        h.LI("Item 2"),
                        h.LI("Item 3"))),
                h.DIV(
                    h.H3("Column 2"),
                    h.UL(
                        h.LI("wob wob wob"),
                        h.LI(h.SPAN({'t-field': "bob"})),
                        h.LI(h.SPAN({'t-field': "edmund"}))
                    ))
            )
        )
test_views.py 文件源码 项目:gooderp_org 作者: osbzr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_save_only_embedded(self):
        Company = self.registry('res.company')
        company_id = 1
        Company.write(self.cr, self.uid, company_id, {'name': "Foo Corporation"})

        node = html.tostring(h.SPAN(
            "Acme Corporation",
            attrs(model='res.company', id=company_id, field="name", expression='bob', type='char')))

        self.registry('ir.ui.view').save(self.cr, self.uid, res_id=company_id,value=node)

        company = Company.browse(self.cr, self.uid, company_id)
        self.assertEqual(company.name, "Acme Corporation")
test_views.py 文件源码 项目:gooderp_org 作者: osbzr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_field_tail(self):
        View = self.registry('ir.ui.view')
        replacement = ET.tostring(
            h.LI(h.SPAN("+12 3456789", attrs(
                        model='res.company', id=1, type='char',
                        field='phone', expression="edmund")),
                 "whop whop"
        ), encoding="utf-8")
        View.save(self.cr, self.uid, res_id = self.view_id, value=replacement,
                  xpath='/div/div[2]/ul/li[3]')

        self.eq(
            ET.fromstring(View.browse(self.cr, self.uid, self.view_id).arch.encode('utf-8')),
            h.DIV(
                h.DIV(
                    h.H3("Column 1"),
                    h.UL(
                        h.LI("Item 1"),
                        h.LI("Item 2"),
                        h.LI("Item 3"))),
                h.DIV(
                    h.H3("Column 2"),
                    h.UL(
                        h.LI("Item 1"),
                        h.LI(h.SPAN("My Company", attrs(model='res.company', id=1, field='name', type='char'))),
                        h.LI(h.SPAN({'t-field': "edmund"}), "whop whop"),
                    ))
            )
        )
utils.py 文件源码 项目:danube-delta 作者: honzajavorek 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def modify_html(content, prop='_content'):
    html_string = getattr(content, prop)
    html_tree = html.fromstring(html_string)

    yield html_tree

    html_string = html.tostring(html_tree, encoding='unicode')
    html_string = re.sub(r'%7B(\w+)%7D', r'{\1}', html_string)
    html_string = re.sub(r'%7C(\w+)%7C', r'|\1|', html_string)
    setattr(content, prop, html_string)
mf.py 文件源码 项目:krauler 作者: occrp-attic 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_content(self, page, meta):
        if not page.is_html:
            return page.content

        check_path = self.config.data.get('check_path')
        if check_path is not None:
            if page.doc.find(check_path) is None:
                log.info("Failed XML path check: %r", page.url)
                return None

        for meta_el in ['title', 'author', 'date']:
            path = self.config.data.get('%s_path' % meta_el)
            if path is not None and page.doc.findtext(path):
                meta[meta_el] = page.doc.findtext(path)

        if 'date' in meta:
            try:
                date = meta.pop('date')
                date = parse(date)
                if 'dates' not in meta:
                    meta['dates'] = []
                meta['dates'].append(date.isoformat())
            except Exception as ex:
                log.exception(ex)

        body = page.doc
        if self.config.data.get('body_path') is not None:
            body = page.doc.find(self.config.data.get('body_path'))

        for path in self.config.data.get('remove_paths', []):
            for el in body.findall(path):
                el.drop_tree()

        return html.tostring(body)
movie_info.py 文件源码 项目:xcrawler 作者: 0xE8551CCB 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parse_movie_details(self, response):
        html_root = html.fromstring(response.content,
                                    base_url=response.base_url)

        movie_info = dict()
        movie_info['??'] = self.xpath_first(html_root,
                                            '//div[@id="content"]'
                                            '/h1/span[1]/text()').strip()

        try:
            # to pure text
            soup = BeautifulSoup(html.tostring(
                self.xpath_first(html_root,
                                 '//div[@id="info"]')), 'html')
        except TypeError:
            return None
        else:
            for line in soup.get_text().splitlines():
                try:
                    left, *right = line.split(':')
                except AttributeError:
                    pass
                else:
                    key = left.strip()
                    value = ''.join(x.strip() for x in right)

                    if key and value:
                        movie_info[key] = value

            yield movie_info
tests.py 文件源码 项目:nom 作者: frnsys 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_convert_spans(self):
        expected = '''
            <p>
                <em><strong>
                    foobar
                    <em>
                        lala
                        <strong>
                            yum
                        </strong>
                    </em>
                    <span>
                        hey hey
                    </span>
                    <strong>
                        uh oh
                    </strong>
                    <span>
                        yes
                    </span>
                </strong></em>
            </p>
        '''

        h = fromstring(html)
        for span in h.findall('.//span'):
            html2md.convert_span(span)
        result = tostring(h).decode('utf-8')

        results = [x.replace('\n', '').replace(' ', '') for x in [result, expected]]
        print('=========')
        print(results[0])
        print('=========')
        print(results[1])
        self.assertEqual(results[0], results[1])


问题


面经


文章

微信
公众号

扫码关注公众号