def extract_news(news_url):
# Fetch html
session_requests = requests.session()
response = session_requests.get(news_url, headers=getHeaders())
news = {}
try:
# Parse html
tree = html.fromstring(response.content)
# Extract information
news = tree.xpath(GET_CNN_NEWS_XPATH)
news = ''.join(news)
except Exception as e:
print # coding=utf-8
return {}
return news
评论列表
文章目录