def getHtml(url, tryEncode=True, followRedirect=True):
class NoRedirection(urllib2.HTTPErrorProcessor):
def http_response(self, request, response):
code, msg, hdrs = response.code, response.msg, response.info()
return response
https_response = http_response
if followRedirect:
opener=urllib2.build_opener()
else:
opener=urllib2.build_opener(NoRedirection)
try:
page=opener.open(url)
pageHtml=page.read()
except:
opener.close()
return None
if tryEncode:
try:
charset = re.findall('charset=(.*?)$', page.info()['Content-Type'])[0].lower()
if charset != 'utf-8': pageHtml = pageHtml.decode(charset) #?????? ???????? ? ???????????
except: pass
pageHtml = strUniEncode(pageHtml)
opener.close()
return pageHtml
评论列表
文章目录