def proxyurllib():
print(COLOR_GREEN+'-'*30+COLOR_NONE)
#TODO proxy
handler=request.ProxyHandler({'http':'http://10.112.5.173:49908'})
'''
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
'''
opener=request.build_opener(handler)
request.install_opener(opener)
#??opener??urlopen()?????URL opener??????urlopen()????????opener???response=
google = request.urlopen('http://www.google.com')
print(google.read())
print("?????",request.getproxies())
#proxyurllib()
#FIXME ROBOT.TXT??
python类getproxies()的实例源码
def make_soup(url): # pragma: no cover
"""Make soup, that is basically parsing the html document."""
response = requests.get(
url,
headers={'User-agent': 'UIP'},
# gets system proxy (if it is currently using one)
proxies=getproxies())
html = response.content
return BeautifulSoup(html, "html.parser")
def make_json(url): # pragma: no cover
"""Make a dictionary out of a json file."""
response = requests.get(
url,
headers={'User-agent': 'UIP'},
# gets system proxy (if it is currently using one)
proxies=getproxies())
json_file = response.text
data = json.loads(json_file)
return data
def get_proxies():
proxies = getproxies()
filtered_proxies = {}
for key, value in proxies.items():
if key.startswith('http'):
if not value.startswith('http'):
filtered_proxies[key] = 'http://%s' % value
else:
filtered_proxies[key] = value
return filtered_proxies