def lxml_case3():
text = '''
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item><span>Hello world</span></a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
<li class="de-item-0"><a href="link5.html">fifth item</a>
</ul>
</div>
'''
tree=etree.HTML(text)
html_s=etree.tostring(tree)
#print html_s
#print tree.xpath('//li//span/text()')[0]
'''
reg_case=tree.xpath('//*[starts-with(@class,"item")]')
for i in reg_case:
print i.xpath('.//a/@href')
'''
result=tree.xpath(r'//*[re:match(@class, "item-0")]')
print result
for i in result[0]:
print i.xpath('.//a/@href')
评论列表
文章目录