def follow_redirects(link, sites= None):
"""Follow directs for the link as long as the redirects are on the given
sites and return the resolved link."""
def follow(url):
return sites == None or urlparse.urlparse(url).hostname in sites
class RedirectHandler(urllib2.HTTPRedirectHandler):
def __init__(self):
self.last_url = None
def redirect_request(self, req, fp, code, msg, hdrs, newurl):
self.last_url = newurl
if not follow(newurl):
return None
r = urllib2.HTTPRedirectHandler.redirect_request(
self, req, fp, code, msg, hdrs, newurl)
r.get_method = lambda : 'HEAD'
return r
if not follow(link):
return link
redirect_handler = RedirectHandler()
opener = urllib2.build_opener(redirect_handler)
req = urllib2.Request(link)
req.get_method = lambda : 'HEAD'
try:
with contextlib.closing(opener.open(req,timeout=1)) as site:
return site.url
except:
return redirect_handler.last_url if redirect_handler.last_url else link
python类HTTPRedirectHandler()的实例源码
def test_invalid_redirect(self):
from_url = "http://example.com/a.html"
valid_schemes = ['http', 'https', 'ftp']
invalid_schemes = ['file', 'imap', 'ldap']
schemeless_url = "example.com/b.html"
h = urllib2.HTTPRedirectHandler()
o = h.parent = MockOpener()
req = Request(from_url)
req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
for scheme in invalid_schemes:
invalid_url = scheme + '://' + schemeless_url
self.assertRaises(urllib2.HTTPError, h.http_error_302,
req, MockFile(), 302, "Security Loophole",
MockHeaders({"location": invalid_url}))
for scheme in valid_schemes:
valid_url = scheme + '://' + schemeless_url
h.http_error_302(req, MockFile(), 302, "That's fine",
MockHeaders({"location": valid_url}))
self.assertEqual(o.req.get_full_url(), valid_url)
def test_invalid_redirect(self):
from_url = "http://example.com/a.html"
valid_schemes = ['http', 'https', 'ftp']
invalid_schemes = ['file', 'imap', 'ldap']
schemeless_url = "example.com/b.html"
h = urllib2.HTTPRedirectHandler()
o = h.parent = MockOpener()
req = Request(from_url)
req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
for scheme in invalid_schemes:
invalid_url = scheme + '://' + schemeless_url
self.assertRaises(urllib2.HTTPError, h.http_error_302,
req, MockFile(), 302, "Security Loophole",
MockHeaders({"location": invalid_url}))
for scheme in valid_schemes:
valid_url = scheme + '://' + schemeless_url
h.http_error_302(req, MockFile(), 302, "That's fine",
MockHeaders({"location": valid_url}))
self.assertEqual(o.req.get_full_url(), valid_url)
def test_invalid_redirect(self):
from_url = "http://example.com/a.html"
valid_schemes = ['http', 'https', 'ftp']
invalid_schemes = ['file', 'imap', 'ldap']
schemeless_url = "example.com/b.html"
h = urllib2.HTTPRedirectHandler()
o = h.parent = MockOpener()
req = Request(from_url)
req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
for scheme in invalid_schemes:
invalid_url = scheme + '://' + schemeless_url
self.assertRaises(urllib2.HTTPError, h.http_error_302,
req, MockFile(), 302, "Security Loophole",
MockHeaders({"location": invalid_url}))
for scheme in valid_schemes:
valid_url = scheme + '://' + schemeless_url
h.http_error_302(req, MockFile(), 302, "That's fine",
MockHeaders({"location": valid_url}))
self.assertEqual(o.req.get_full_url(), valid_url)
def getURL(url, host=BASE_URL.split('//')[1], useCookie=False, silent=False, headers=None):
cj = cookielib.LWPCookieJar()
if useCookie:
if isinstance(useCookie, bool):
cj = mechanizeLogin()
else:
cj = useCookie
if isinstance(cj, bool):
return False
dispurl = re.sub('(?i)%s|%s|&token=\w+' % (tvdb, tmdb), '', url).strip()
if not silent:
Log('getURL: ' + dispurl)
if not headers:
headers = [('User-Agent', UserAgent), ('Host', host)]
try:
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPRedirectHandler)
opener.addheaders = headers
usock = opener.open(url)
response = usock.read()
usock.close()
except urllib2.URLError, e:
Log('Error reason: %s' % e, xbmc.LOGERROR)
return False
return response
def test_invalid_redirect(self):
from_url = "http://example.com/a.html"
valid_schemes = ['http', 'https', 'ftp']
invalid_schemes = ['file', 'imap', 'ldap']
schemeless_url = "example.com/b.html"
h = urllib2.HTTPRedirectHandler()
o = h.parent = MockOpener()
req = Request(from_url)
req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT
for scheme in invalid_schemes:
invalid_url = scheme + '://' + schemeless_url
self.assertRaises(urllib2.HTTPError, h.http_error_302,
req, MockFile(), 302, "Security Loophole",
MockHeaders({"location": invalid_url}))
for scheme in valid_schemes:
valid_url = scheme + '://' + schemeless_url
h.http_error_302(req, MockFile(), 302, "That's fine",
MockHeaders({"location": valid_url}))
self.assertEqual(o.req.get_full_url(), valid_url)
def get_file(self, url, quality):
self.cookieJar = cookielib.LWPCookieJar()
self.opener = urllib2.build_opener(
urllib2.HTTPCookieProcessor(self.cookieJar),
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0))
self.opener.addheaders = [('User-agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36")]
forms = {"youtubeURL": url,
'quality':quality
}
data = urllib.urlencode(forms)
req = urllib2.Request('http://www.convertmemp3.com/',data)
res = self.opener.open(req)
self.convhtml = res.read()
def http_error_302(self, req, fp, code, msg, headers):
self.checkindicators(url=req.get_full_url(), headers=headers)
if (self.debug):
self.indent_n += 1
print "%s[REDIRECT] = [../%s]" % (self.indent(), headers['Location'])
return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
def http_error_301(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
return result
def http_error_302(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
return result
def http_error_302(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
result.status = code
return result
def http_error_301(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
return result
def http_error_302(self, req, fp, code, msg, headers):
if "Set-Cookie" in headers:
req.add_header('cookie', headers['Set-Cookie'])
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
return result
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
from cookielib import CookieJar
from test.test_cookielib import interact_netscape
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assertTrue(not hh.req.has_header("Cookie"))
def test_redirect_fragment(self):
redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
o = build_test_opener(hh, hdeh, hrh)
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
from cookielib import CookieJar
from test.test_cookielib import interact_netscape
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assertTrue(not hh.req.has_header("Cookie"))
def test_redirect_fragment(self):
redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
hdeh = urllib2.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler()
o = build_test_opener(hh, hdeh, hrh)
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
def http_error_302(self, req, fp, code, msg, headers):
if headers.dict.has_key('location'):
infourl = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
else:
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
if not hasattr(infourl, 'status'):
infourl.status = code
return infourl
def http_error_301(self, req, fp, code, msg, headers):
if headers.dict.has_key('location'):
infourl = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
else:
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
if not hasattr(infourl, 'status'):
infourl.status = code
return infourl
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
def http_error_301(self, req, fp, code, msg, hdrs):
result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp,
code, msg, hdrs)
result.status = code
result.newurl = result.geturl()
return result
# The default implementations in urllib2.HTTPRedirectHandler
# are identical, so hardcoding a http_error_301 call above
# won't affect anything
SignAndSearch.py 文件源码
项目:relational-social-media-search-engine
作者: indervirbanipal
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def __init__(self, login, password):
""" Start up... """
self.login = login
self.password = password
# Simulate browser with cookies enabled
self.cj = cookielib.MozillaCookieJar(cookie_filename)
'''
Creating settings for the proxy
'''
# proxy_handler = urllib2.ProxyHandler({'http':'209.222.25.83:3128'})
# 216.58.194.113
# proxy_handler = urllib2.ProxyHandler({'http':'8.8.8.8'})
proxy_handler = urllib2.ProxyHandler({'http':'notional-sign-110911.appspot.com'})
# proxy_auth_handler = urllib2.ProxyBasicAuthHandler()
if os.access(cookie_filename, os.F_OK):
self.cj.load()
self.opener = urllib2.build_opener(
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
proxy_handler,
urllib2.HTTPCookieProcessor(self.cj)
)
self.opener.addheaders = [
('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; '
'Windows NT 5.2; .NET CLR 1.1.4322)'))
]
SignAndSearch.py 文件源码
项目:relational-social-media-search-engine
作者: indervirbanipal
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def performFullSearch(self, searchParams, dbHost, dbPort, dbName):
""" Performs search and Saves the information gathered into DB. This method almost performs everything this class is created for """
print "inside Perform Search ... "
try:
#self.login = login
#self.password = password
# Simulate browser with cookies enabled
self.cj = cookielib.MozillaCookieJar(cookie_filename)
if os.access(cookie_filename, os.F_OK):
self.cj.load()
self.opener = urllib2.build_opener(
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(self.cj)
)
self.opener.addheaders = [
('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; '
'Windows NT 5.2; .NET CLR 1.1.4322)'))
]
self.checkLogin(url1)
fName = searchParams['firstName']
mailId = searchParams['email']
if fName == 'EMPTY' or mailId == 'EMPTY':
raise Exception('Info: Search has to be performed from Search page only, Please try again', 'Info')
fSrchURL = self.formSearchURL(searchParams)
linkedJSON = self.loadSearch(fSrchURL, fName)
recordJSON = self.formTrimmedJSON(linkedJSON)
dbRecord = self.formDBRecord(recordJSON, mailId)
client = self.connect2DB(dbHost, dbPort)
print "Client details : "+client.__str__()
self.store2DB(dbRecord, mailId, client)
return 'Success'
except Exception as e:
x,y = e.args
return x
SignAndSearch.py 文件源码
项目:relational-social-media-search-engine
作者: indervirbanipal
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def filterResult(self, filterParams, dbHost, dbPort, dbName):
"""Performs a filter based on the filter parameters """
print "Inside Filter Result view ..."
try:
self.cj = cookielib.MozillaCookieJar(cookie_filename)
if os.access(cookie_filename, os.F_OK):
self.cj.load()
self.opener = urllib2.build_opener(
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(self.cj)
)
self.opener.addheaders = [
('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; '
'Windows NT 5.2; .NET CLR 1.1.4322)'))
]
self.checkLogin(url1)
## start here ##
print " Data So Far : \n"+Person.objects.all()
return 'Success'
except Exception as e:
x,y = e.args
return x
def http_error_302(self, req, fp, code, msg, headers):
if headers.dict.has_key('location'):
infourl = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
else:
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
if not hasattr(infourl, 'status'):
infourl.status = code
return infourl