def clone_url(url):
"""Get http code of url.
:param url: url to clone
:return:
"""
# get html
if '://' not in url:
url = 'http://' + url
r = requests.get(url)
# We don't trust requests encoding so we use cchardet
# to detect real encoding
# Without it we got decode error (for example: baidu.com)
r.encoding = cchardet.detect(r.content)['encoding']
html = r.content.decode(r.encoding)
# set relative url rule
if '<base' not in html:
html = html.replace('<head>', '<head><base href="%s" />' % url)
return html
评论列表
文章目录