def latest_content(url):
'''
??????????
Parameter
--------
url:????
Return
--------
string:?????????
'''
try:
html = lxml.html.parse(url)
res = html.xpath('//div[@id=\"artibody\"]/p')
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr).replace(' ', '')#.replace('\n\n', '\n').
html_content = lxml.html.fromstring(sarr)
content = html_content.text_content()
return content
except Exception as er:
print(str(er))
评论列表
文章目录