def gethtml(url):
try:
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0')
request.add_header('Accept-Language', 'en-us;q=0.5,en;q=0.3')
request.add_header('Referer', request.get_full_url())
u = urllib2.urlopen(request , timeout = 3)
content = u.read()
try:
content = content.encode("utf-8")
except:
content = content.decode('gbk','ignore').encode("utf-8",'ignore')
return {"html":content,"code":u.code,"url":u.geturl()}
except urllib2.HTTPError,e:
try:
return {"html":e.read(),"code":e.code,"url":e.geturl()}
except:
return {"html":'',"code":e.code,"url":e.geturl()}
except:
return {"html":"","code":404, "url":url}
评论列表
文章目录