def get_title_by_url(url, timeout=5, pattern='<title>(.*?)</title>'):
"""return {url:title}, if title do not find we return{url:None}"""
try:
raw_http = requests.get(url, timeout=timeout)
raw_http.encoding = raw_http.apparent_encoding
except requests.ConnectionError or requests.ConnectTimeout:
logger_util.log_debug('Connect failed to %s ' % url)
return
title = re.findall(pattern, raw_http.text)
if not title:
logger_util.log_debug('This page do not have title %s' % url)
return {url: None}
else:
return {url: title[0]}
# ----------------------------------------------------------------------
评论列表
文章目录