def get_article_info(url):
"""
Returns a dictionary with the article info.
The dictionary contains the following fields:
- date
- title
- tags (list of tags at the end of the article)
- url
"""
content = urllib2.urlopen(url)
tree = html.parse(content)
content.close()
title = tree.xpath('//h1[@id="articulo-titulo"]/text()')[0]
date = tree.xpath('//time//a/text()')[0].strip()
tags = tree.xpath('//li[@itemprop="keywords"]/a/text()')
url = url
result = {'date': date, 'title': title, 'tags': tags, 'url': url}
return(result)
评论列表
文章目录