def fetch(retry=0):
proxy = 'http://{}'.format(Proxy.get_random()['address'])
headers = {'user-agent': get_user_agent()}
conn = aiohttp.ProxyConnector(proxy=proxy)
url = 'http://httpbin.org/ip'
try:
with aiohttp.ClientSession(connector=conn) as session:
with aiohttp.Timeout(TIMEOUT):
async with session.get(url, headers=headers) as resp:
return await resp.json()
except (ProxyConnectionError, TimeoutError):
try:
p = Proxy.objects.get(address=proxy)
if p:
p.delete()
except DoesNotExist:
pass
retry += 1
if retry > 5:
raise TimeoutError()
await asyncio.sleep(1)
return await fetch(retry=retry)
python类ProxyConnector()的实例源码
def fetch(url, retry=0):
proxy = 'http://{}'.format(Proxy.get_random()['address'])
headers = {'user-agent': get_user_agent()}
conn = aiohttp.ProxyConnector(proxy=proxy)
js_url = gen_js_url(url)
try:
with aiohttp.ClientSession(connector=conn) as session:
with aiohttp.Timeout(TIMEOUT):
async with session.get(url, headers=headers) as resp:
html_text = await resp.text()
async with session.get(js_url, headers=headers) as resp:
js_data = await resp.json()
except:
retry += 1
if retry > 5:
raise CrawlerError()
await asyncio.sleep(1)
return await fetch(url, retry=retry)
return html_text, js_data
def _download(self, request):
log.debug("Http Request: %s %s" % (request.method, request.url))
with aiohttp.ClientSession(connector=None if (request.proxy is None) else aiohttp.ProxyConnector(proxy=request.proxy),
cookies=request.cookies) as session:
with aiohttp.Timeout(self._timeout):
async with session.request(request.method,
request.url,
headers=request.headers,
data=request.body) as resp:
body = await resp.read()
response = HttpResponse(resp.url,
resp.status,
headers=resp.headers,
body=body,
cookies=resp.cookies)
return response
def _download(self, request):
log.debug("Http Request: %s %s" % (request.method, request.url))
with aiohttp.ClientSession(connector=None if (request.proxy is None) else aiohttp.ProxyConnector(proxy=request.proxy),
cookies=request.cookies) as session:
with aiohttp.Timeout(self._timeout):
async with session.request(request.method,
request.url,
headers=request.headers,
data=request.body) as resp:
body = await resp.read()
response = HttpResponse(resp.url,
resp.status,
headers=resp.headers,
body=body,
cookies=resp.cookies)
return response
def create_session(self, loop):
conn = None
if self.proxy and self.proxy_user:
conn = aiohttp.ProxyConnector(
loop=loop,
limit=self.parallel,
proxy=self.proxy,
proxy_auth=aiohttp.BasicAuth(self.proxy_user, self.proxy_password)
)
elif self.proxy:
conn = aiohttp.ProxyConnector(loop=loop, limit=self.parallel, proxy=self.proxy)
else:
conn = aiohttp.TCPConnector(loop=loop, limit=self.parallel)
session = aiohttp.ClientSession(connector=conn)
return session
def _verify_proxy(self, proxy):
addr = proxy['protocol'] + '://' + proxy['ip'] +':'+proxy['port']
conn = aiohttp.ProxyConnector(proxy=addr)
try:
session = aiohttp.ClientSession(connector=conn)
with aiohttp.Timeout(10):
async with session.get(self.test_url[random.randrange(len(self.test_url))]) as response: # close connection and response, otherwise will tip: Unclosed connection and Unclosed response
try:
assert response.status == 200
redis_sadd(self.redis, self.proxy_key, proxy)
except:
pass
except: #ProxyConnectionError, HttpProxyError and etc?
pass
finally:
session.close() # close session when timeout
def getPage(url, res_list):
print(url)
headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
# conn = aiohttp.ProxyConnector(proxy="http://127.0.0.1:8087")
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
assert resp.status == 200
res_list.append(await resp.text())
def fetch(url, proxy=None):
conn = aiohttp.ProxyConnector(proxy=proxy)
headers = {'user-agent': get_user_agent()}
with aiohttp.ClientSession(connector=conn) as session:
with aiohttp.Timeout(TIMEOUT):
async with session.get('http://python.org', headers) as resp:
return resp.json()
def _download(self, proxy):
with aiohttp.ClientSession(connector=aiohttp.ProxyConnector(proxy=proxy)) as session:
with aiohttp.Timeout(20):
async with session.request("GET",
"http://m.baidu.com") as resp:
body = await resp.read()
def replication_worker(node: str):
if not running:
return
queue = get_queue_for_node(node)
logger.debug('[-> {node}] Starting...'.format(
node=node,
))
INTERVALS[node] = MIN_INTERVAL
conn = None
if config.args.proxy:
conn = aiohttp.ProxyConnector(proxy=config.args.proxy)
with aiohttp.ClientSession(connector=conn) as session:
try:
while True:
qitem = yield from queue_getter(queue)
if qitem:
itemid, obj = qitem
else:
continue
try:
yield from perform_operation(session, node, obj)
INTERVALS[node] = MIN_INTERVAL
logger.debug('[-> {node}] Operation replicated successfully'.format(node=node))
except (IOError, aiohttp.errors.ClientError):
logger.exception('[-> {node}] Error during replication'.format(node=node))
yield from asyncio.sleep(INTERVALS[node])
# Slow down repetitions
INTERVALS[node] = min(INTERVALS[node] * 2, MAX_INTERVAL)
else:
queue.delete(itemid)
except asyncio.CancelledError:
logger.debug('[-> {node}] Cancelled.'.format(
node=node,
))
logger.debug('[-> {node}] Goodbye.'.format(
node=node,
))
def __call__(self, session=None, proxy=None):
"""
:param session: An `aiohttp.ClientSession` object
:param proxy: If session is unset, an http proxy addess. See
the documentation on `aiohttp.ProxyConnector`
"""
if session is None:
conn = None
if proxy is not None:
conn = aiohttp.ProxyConnector(proxy=proxy)
session = aiohttp.ClientSession(connector=conn)
return SessionManager(self.spec, session)