def get(self, host, params=()):
# type: (object, object) -> object
"""Get metadata by url"""
self.__is_server_online(host)
self.__disable_verbose()
self.__parse_params(params)
scheme, host = urlparse(host).scheme, urlparse(host).netloc
self.DEFAULT_HTTP_PROTOCOL = scheme + "://"
self.urls = self.__get_urls(host)
response = {}
self.HEADER['user-agent'] = self.reader.get_random_user_agent()
log.info("user-agent : " + self.HEADER['user-agent'])
log.info('Thread num : ' + str(self.threads))
try:
httplib.HTTPConnection.debuglevel = self.debug
if hasattr(urllib3, 'disable_warnings'):
urllib3.disable_warnings()
if scheme == "http":
self.http = urllib3.HTTPConnectionPool(host.split(':')[0],
port=80 if len(host.split(':')) == 1 else int(
host.split(':')[1]), block=True, maxsize=10)
elif scheme == "https":
self.http = urllib3.HTTPSConnectionPool(host.split(':')[0],
port=443 if len(host.split(':')) == 1 else int(
host.split(':')[1]), block=True, maxsize=10)
else:
log.critical("not support http protocl, Exit now ")
sys.exit(1);
pool = threadpool.ThreadPool(self.threads)
requests = threadpool.makeRequests(self.request, self.urls)
for req in requests:
pool.putRequest(req)
time.sleep(1)
pool.wait()
except exceptions.AttributeError as e:
log.critical(e.message)
except KeyboardInterrupt:
log.warning('Session canceled')
sys.exit()
self.counter['total'] = self.urls.__len__()
self.counter['pools'] = pool.workers.__len__()
response['count'] = self.counter
response['result'] = self.result
return response
评论列表
文章目录