def sGet(self, url, ch='gbk', bt='solomon'):
bots = {
"baidu": "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
'google': "Googlebot/2.1 (+http://www.google.com/bot.html)",
'solomon': "Solomon Net Vampire/1.0",
'de': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:44.0) Gecko/20100101 Firefox/44.0"
}
headers = {
#'Host': 'www.super-ping.com',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
'User-Agent': bots[bt],
#'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6'
}
import cookielib
cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
urllib2.install_opener(opener)
req = urllib2.Request(url=url)
try:
gc.enable()
#gc.set_debug(gc.DEBUG_LEAK)
req = urllib2.urlopen(req)
data = req.read()
if ch == 'gbk':
data = data.decode("gbk", 'ignore')
elif ch == 'utf8':
data = data.decode("utf-8")
req.close()
del req
gc.collect()
return data
except IOError, e:
print e
#if(e.code == 404):
# return False
评论列表
文章目录