def cache_resource(self, url):
if self.proxy_url is not None:
proxy = urllib2.ProxyHandler({'http': self.proxy_url})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
request = urllib2.Request(url)
user_agent = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.35 Safari/535.1'
request.add_header('User-Agent', user_agent)
handler = urllib2.urlopen(request, timeout=self.http_timeout)
try:
resource_type = MIME_TYPES[handler.headers.get('Content-Type')]
if not resource_type:
raise UnsupportedResourceFormat("Resource format not found")
except KeyError:
raise UnsupportedResourceFormat("Resource format not supported")
etag = handler.headers.get('ETag')
last_modified = handler.headers.get('Last-Modified')
resource_key = self.get_resource_key(url)
stream = handler.read()
self.update_resource_params(resource_key, resource_type, etag, last_modified, stream)
return stream, resource_type
评论列表
文章目录