def _guba_content(url):
try:
html = lxml.html.parse(url)
res = html.xpath('//div[@class=\"ilt_p\"]/p')
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr).replace(' ', '')#.replace('\n\n', '\n').
html_content = lxml.html.fromstring(sarr)
content = html_content.text_content()
ptime = html.xpath('//div[@class=\"fl_left iltp_time\"]/span/text()')[0]
rcounts = html.xpath('//div[@class=\"fl_right iltp_span\"]/span[2]/text()')[0]
reg = re.compile(r'\((.*?)\)')
rcounts = reg.findall(rcounts)[0]
return [content, ptime, rcounts]
except Exception:
return ['', '', '0']
评论列表
文章目录