def http_error_301(self, req, fp, code, msg, headers):
if headers.dict.has_key('location'):
infourl = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
else:
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
if not hasattr(infourl, 'status'):
infourl.status = code
return infourl
python类HTTPRedirectHandler()的实例源码
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
from cookielib import CookieJar
from test.test_cookielib import interact_netscape
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assertTrue(not hh.req.has_header("Cookie"))
def test_redirect_fragment(self):
redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
o = build_test_opener(hh, hdeh, hrh)
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
from cookielib import CookieJar
from test.test_cookielib import interact_netscape
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assertTrue(not hh.req.has_header("Cookie"))
def test_redirect_fragment(self):
redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
o = build_test_opener(hh, hdeh, hrh)
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
def redirect_request(self, req, *args, **kwargs):
new_req = urllib2.HTTPRedirectHandler.redirect_request(
self, req, *args, **kwargs)
# Same thing as in our set_proxy implementation, but in this case
# we"ve only got a Request to work with, so it was this or copy
# everything over piecemeal.
#
# Note that we do not persist tunneling behavior from an http request
# to an https request, because an http request does not set _tunnel_host.
#
# Also note that in Python < 2.6, you will get an error in
# FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
# proxy, since the proxy type will be set to http instead of https.
# (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
# https.) Such an urllib2.Request could result from this redirect
# if you are redirecting from an http request (since an an http request
# does not have _tunnel_host set, and thus you will not set the proxy
# in the code below), and if you have defined a proxy for https in, say,
# FancyProxyHandler, and that proxy has type http.
if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
if new_req.get_type() == "https":
if req._tunnel_host:
# req is proxied, so copy the proxy info.
new_req._tunnel_host = new_req.get_host()
new_req.set_proxy(req.host, "https")
else:
# req is not proxied, so just make sure _tunnel_host is defined.
new_req._tunnel_host = None
new_req.type = "https"
if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
# Copy the auxiliary data in case this or any further redirect is https
new_req._key_file = req._key_file
new_req._cert_file = req._cert_file
new_req._ca_certs = req._ca_certs
return new_req
def http_error_301(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
result.status = code
return result
def http_error_302(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
result.status = code
return result
def redirect_request(self, req, *args, **kwargs):
new_req = urllib2.HTTPRedirectHandler.redirect_request(
self, req, *args, **kwargs)
# Same thing as in our set_proxy implementation, but in this case
# we"ve only got a Request to work with, so it was this or copy
# everything over piecemeal.
#
# Note that we do not persist tunneling behavior from an http request
# to an https request, because an http request does not set _tunnel_host.
#
# Also note that in Python < 2.6, you will get an error in
# FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
# proxy, since the proxy type will be set to http instead of https.
# (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
# https.) Such an urllib2.Request could result from this redirect
# if you are redirecting from an http request (since an an http request
# does not have _tunnel_host set, and thus you will not set the proxy
# in the code below), and if you have defined a proxy for https in, say,
# FancyProxyHandler, and that proxy has type http.
if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
if new_req.get_type() == "https":
if req._tunnel_host:
# req is proxied, so copy the proxy info.
new_req._tunnel_host = new_req.get_host()
new_req.set_proxy(req.host, "https")
else:
# req is not proxied, so just make sure _tunnel_host is defined.
new_req._tunnel_host = None
new_req.type = "https"
if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
# Copy the auxiliary data in case this or any further redirect is https
new_req._key_file = req._key_file
new_req._cert_file = req._cert_file
new_req._ca_certs = req._ca_certs
return new_req
def redirect_request(self, req, *args, **kwargs):
new_req = urllib2.HTTPRedirectHandler.redirect_request(
self, req, *args, **kwargs)
# Same thing as in our set_proxy implementation, but in this case
# we"ve only got a Request to work with, so it was this or copy
# everything over piecemeal.
#
# Note that we do not persist tunneling behavior from an http request
# to an https request, because an http request does not set _tunnel_host.
#
# Also note that in Python < 2.6, you will get an error in
# FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
# proxy, since the proxy type will be set to http instead of https.
# (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
# https.) Such an urllib2.Request could result from this redirect
# if you are redirecting from an http request (since an an http request
# does not have _tunnel_host set, and thus you will not set the proxy
# in the code below), and if you have defined a proxy for https in, say,
# FancyProxyHandler, and that proxy has type http.
if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
if new_req.get_type() == "https":
if req._tunnel_host:
# req is proxied, so copy the proxy info.
new_req._tunnel_host = new_req.get_host()
new_req.set_proxy(req.host, "https")
else:
# req is not proxied, so just make sure _tunnel_host is defined.
new_req._tunnel_host = None
new_req.type = "https"
if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
# Copy the auxiliary data in case this or any further redirect is https
new_req._key_file = req._key_file
new_req._cert_file = req._cert_file
new_req._ca_certs = req._ca_certs
return new_req
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_302(self, req, fp, code, msg, headers):
pass
#return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
def follow_redirects(link, sites= None):
"""Follow directs for the link as long as the redirects are on the given
sites and return the resolved link."""
def follow(url):
return sites == None or urlparse.urlparse(url).hostname in sites
class RedirectHandler(urllib2.HTTPRedirectHandler):
def __init__(self):
self.last_url = None
def redirect_request(self, req, fp, code, msg, hdrs, newurl):
self.last_url = newurl
if not follow(newurl):
return None
r = urllib2.HTTPRedirectHandler.redirect_request(
self, req, fp, code, msg, hdrs, newurl)
r.get_method = lambda : 'HEAD'
return r
if not follow(link):
return link
redirect_handler = RedirectHandler()
opener = urllib2.build_opener(redirect_handler)
req = urllib2.Request(link)
req.get_method = lambda : 'HEAD'
try:
with contextlib.closing(opener.open(req,timeout=1)) as site:
return site.url
except:
return redirect_handler.last_url if redirect_handler.last_url else link
def follow_redirects(link, sites= None):
"""Follow directs for the link as long as the redirects are on the given
sites and return the resolved link."""
def follow(url):
return sites == None or urlparse.urlparse(url).hostname in sites
class RedirectHandler(urllib2.HTTPRedirectHandler):
def __init__(self):
self.last_url = None
def redirect_request(self, req, fp, code, msg, hdrs, newurl):
self.last_url = newurl
if not follow(newurl):
return None
r = urllib2.HTTPRedirectHandler.redirect_request(
self, req, fp, code, msg, hdrs, newurl)
r.get_method = lambda : 'HEAD'
return r
if not follow(link):
return link
redirect_handler = RedirectHandler()
opener = urllib2.build_opener(redirect_handler)
req = urllib2.Request(link)
req.get_method = lambda : 'HEAD'
try:
with contextlib.closing(opener.open(req,timeout=1)) as site:
return site.url
except:
return redirect_handler.last_url if redirect_handler.last_url else link
def redirect_request(self, req, *args, **kwargs):
new_req = urllib2.HTTPRedirectHandler.redirect_request(
self, req, *args, **kwargs)
# Same thing as in our set_proxy implementation, but in this case
# we"ve only got a Request to work with, so it was this or copy
# everything over piecemeal.
#
# Note that we do not persist tunneling behavior from an http request
# to an https request, because an http request does not set _tunnel_host.
#
# Also note that in Python < 2.6, you will get an error in
# FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
# proxy, since the proxy type will be set to http instead of https.
# (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
# https.) Such an urllib2.Request could result from this redirect
# if you are redirecting from an http request (since an an http request
# does not have _tunnel_host set, and thus you will not set the proxy
# in the code below), and if you have defined a proxy for https in, say,
# FancyProxyHandler, and that proxy has type http.
if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
if new_req.get_type() == "https":
if req._tunnel_host:
# req is proxied, so copy the proxy info.
new_req._tunnel_host = new_req.get_host()
new_req.set_proxy(req.host, "https")
else:
# req is not proxied, so just make sure _tunnel_host is defined.
new_req._tunnel_host = None
new_req.type = "https"
if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
# Copy the auxiliary data in case this or any further redirect is https
new_req._key_file = req._key_file
new_req._cert_file = req._cert_file
new_req._ca_certs = req._ca_certs
return new_req
def http_error_301(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
return result
def http_error_302(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
return result
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, headers):
global rdctr
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
result.status = code
# print code
# print result.status
rdctr+=1
return result