def catch_request(request):
"""Helper function to catch common exceptions encountered when
establishing a connection with a HTTP/HTTPS request
"""
try:
uh = urlopen(request)
return uh
except (HTTPError, URLError, socket.error):
return False
python类URLError()的实例源码
def getBestServer(servers):
"""Perform a speedtest.net latency request to determine which
speedtest.net server has the lowest latency
"""
results = {}
for server in servers:
cum = []
url = '%s/latency.txt' % os.path.dirname(server['url'])
urlparts = urlparse(url)
for i in range(0, 3):
try:
if urlparts[0] == 'https':
h = HTTPSConnection(urlparts[1])
else:
h = HTTPConnection(urlparts[1])
headers = {'User-Agent': user_agent}
start = timeit.default_timer()
h.request("GET", urlparts[2], headers=headers)
r = h.getresponse()
total = (timeit.default_timer() - start)
except (HTTPError, URLError, socket.error):
cum.append(3600)
continue
text = r.read(9)
if int(r.status) == 200 and text == 'test=test'.encode():
cum.append(total)
else:
cum.append(3600)
h.close()
avg = round((sum(cum) / 6) * 1000, 3)
results[avg] = server
fastest = sorted(results.keys())[0]
best = results[fastest]
best['latency'] = fastest
return best
def lookup(keyword): # word lookup function
# build url to lookup
url = base_url + keyword
req = Request(url) # grab web page
try:
grab_page = urlopen(req)
except URLError as e:
if hasattr(e, 'reason'):
print(keyword, e.reason)
undef_unknowns = open("unknown_words_notfound.txt", "a")
undef_unknowns.write((keyword + "\n")) # log unfound word in file
undef_unknowns.close()
elif hasattr(e, 'code'):
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
else:
web_page = grab_page.readlines() # read web page lines
for line in web_page:
line = line.decode('utf-8')
if '<meta name="description"' in line: # find required line
splitline = line.split('"')
for entry in splitline: # extract bits we want
if 'definition,' in entry:
write_line = keyword+": "+''.join(entry.split('definition, ')[1:])
print(write_line)
write_line +="\n"
def_unknowns = open("unknown_words_defs.txt", "a")
def_unknowns.write(write_line) # write word + def'n to file
def_unknowns.close()
def get_category(url):
try:
html = request.urlopen("http://b.hatena.ne.jp/entry/{}".format(url))
soup = BeautifulSoup(html,"lxml")
return soup.find("html").get("data-category-name")
except request.HTTPError as e:
print(e.reason)
except request.URLError as e:
print(e.reason)
#??????????????????
def is_hatenatop(url):
try:
html = request.urlopen("http://hatenablog.com/")
except urllib.HTTPError as e:
print(e.reason)
except urllib.URLError as e:
print(e.reason)
soup = BeautifulSoup(html,"lxml")
a = soup.find("a",href=url)
if a is None:
return False
return url == a.get("href")
def check_connectivity(reference):
try:
urlopen(reference, timeout=1)
return True
except URLError:
return False
def request(url, data={}, headers={}, timeout=10):
""" Returns a file-like object to the given URL.
"""
if cookies is not None:
f = urllib2.HTTPCookieProcessor(cookies)
f = urllib2.build_opener(f)
else:
f = urllib2.build_opener()
try:
f = f.open(Request(url, urlencode(data) if data else None, headers), timeout=timeout)
except URLError as e:
status = getattr(e, 'code', None) # HTTPError
if status == 401:
raise Forbidden
if status == 403:
raise Forbidden
if status == 404:
raise NotFound
if status == 420:
raise TooManyRequests
if status == 429:
raise TooManyRequests
raise e
except socket.error as e:
if 'timed out' in repr(e.args):
raise Timeout
else:
raise e
log.info(url)
return f
def catch_request(request):
"""Helper function to catch common exceptions encountered when
establishing a connection with a HTTP/HTTPS request
"""
try:
uh = urlopen(request)
return uh, False
except (HTTPError, URLError, socket.error):
e = sys.exc_info()[1]
return None, e
def getBestServer(servers):
"""Perform a speedtest.net latency request to determine which
speedtest.net server has the lowest latency
"""
results = {}
for server in servers:
cum = []
url = '%s/latency.txt' % os.path.dirname(server['url'])
urlparts = urlparse(url)
for i in range(0, 3):
try:
if urlparts[0] == 'https':
h = HTTPSConnection(urlparts[1])
else:
h = HTTPConnection(urlparts[1])
headers = {'User-Agent': user_agent}
start = timeit.default_timer()
h.request("GET", urlparts[2], headers=headers)
r = h.getresponse()
total = (timeit.default_timer() - start)
except (HTTPError, URLError, socket.error):
cum.append(3600)
continue
text = r.read(9)
if int(r.status) == 200 and text == 'test=test'.encode():
cum.append(total)
else:
cum.append(3600)
h.close()
avg = round((sum(cum) / 6) * 1000, 3)
results[avg] = server
fastest = sorted(results.keys())[0]
best = results[fastest]
best['latency'] = fastest
return best
def _internet_on(address):
"""
Check to see if the internet is on by pinging a set address.
:param address: the IP or address to hit
:return: a boolean - true if can be reached, false if not.
"""
try:
urllib2.urlopen(address, timeout=1)
return True
except urllib2.URLError as err:
return False
def _query(self, path, before=None, after=None):
res = []
url = '%s/lookup/%s' % (self.server, path)
params = {}
if self.limit:
params['limit'] = self.limit
if before and after:
params['time_first_after'] = after
params['time_last_before'] = before
else:
if before:
params['time_first_before'] = before
if after:
params['time_last_after'] = after
if params:
url += '?{0}'.format(urlencode(params))
req = Request(url)
req.add_header('Accept', 'application/json')
req.add_header('X-Api-Key', self.apikey)
proxy_args = {}
if self.http_proxy:
proxy_args['http'] = self.http_proxy
if self.https_proxy:
proxy_args['https'] = self.https_proxy
proxy_handler = ProxyHandler(proxy_args)
opener = build_opener(proxy_handler)
try:
http = opener.open(req)
while True:
line = http.readline()
if not line:
break
yield json.loads(line.decode('ascii'))
except (HTTPError, URLError) as e:
raise QueryError(str(e), sys.exc_traceback)
def connect(self):
""" Attempt to connect to the bridge and return true if successful """
self.logger.info('Connect: %s', self.url)
try:
urlopen(self.url, timeout=1)
return True
except URLError:
return False
torrentclient.py 文件源码
项目:Seedbox-Statistics-For-InfluxDB
作者: barrycarey
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _make_request(self, req, genmsg='', fail_msg='', abort_on_fail=None):
"""
Make the web request. Doing it here avoids a lot of duplicate exception handling
:param gen_msg: Message we can print to console or logs so we know about the request
:param fail_msg: Message we can print to console or logs on failure
:param abort_on_fail: Exit on failed request
:return: Response
"""
if genmsg:
self.send_log(genmsg, 'info')
try:
res = urlopen(req)
except URLError as e:
if fail_msg:
msg = fail_msg
else:
msg = 'Failed to make request'
if abort_on_fail:
self.send_log(msg, 'critical')
self.send_log('Aborting', 'critical')
sys.exit(1)
else:
self.send_log(msg, 'error')
return None
return res
def get_active_plugins(self):
"""
Return all active plugins
:return:
"""
req = self._create_request(method='core.get_enabled_plugins', params=[])
try:
self._check_session() # Make sure we still have an active session
res = urlopen(req)
except URLError as e:
msg = 'Failed to get list of plugins. HTTP Error'
self.send_log(msg, 'error')
print(msg)
print(e)
self.active_plugins = []
return
output = self._process_response(res)
if output['error']:
msg = 'Problem getting plugin list from {}. Error: {}'.format(self.torrent_client, output['error'])
print(msg)
self.send_log(msg, 'error')
self.active_plugins = []
return
self.active_plugins = output['result']
def request(self, host, handler, request_body, verbose=0):
"""Send XMLRPC request"""
uri = '{scheme}://{host}{handler}'.format(scheme=self._scheme,
host=host, handler=handler)
if self._passmgr:
self._passmgr.add_password(None, uri, self._username,
self._password)
if self.verbose:
_LOGGER.debug("FabricTransport: {0}".format(uri))
opener = urllib2.build_opener(*self._handlers)
headers = {
'Content-Type': 'text/xml',
'User-Agent': self.user_agent,
}
req = urllib2.Request(uri, request_body, headers=headers)
try:
return self.parse_response(opener.open(req))
except (urllib2.URLError, urllib2.HTTPError) as exc:
try:
code = -1
if exc.code == 400:
reason = 'Permission denied'
code = exc.code
else:
reason = exc.reason
msg = "{reason} ({code})".format(reason=reason, code=code)
except AttributeError:
if 'SSL' in str(exc):
msg = "SSL error"
else:
msg = str(exc)
raise InterfaceError("Connection with Fabric failed: " + msg)
except BadStatusLine:
raise InterfaceError("Connection with Fabric failed: check SSL")
webCrawler.py 文件源码
项目:Learning-Concurrency-in-Python
作者: PacktPublishing
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def run(self):
# We create this context so that we can crawl
# https sites
myssl = ssl.create_default_context();
myssl.check_hostname=False
myssl.verify_mode=ssl.CERT_NONE
# process all the links in our queue
while True:
self.urlLock.acquire()
print("Queue Size: {}".format(self.linksToCrawl.qsize()))
link = self.linksToCrawl.get()
self.urlLock.release()
# have we reached the end of our queue?
if link is None:
break
# Have we visited this link already?
if (link in self.haveVisited):
print("Already Visited: {}".format(link))
break
try:
link = urljoin(self.baseUrl, link)
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
response = urlopen(req, context=myssl)
print("Url {} Crawled with Status: {}".format(response.geturl(), response.getcode()))
soup = BeautifulSoup(response.read(), "html.parser")
for atag in soup.find_all('a'):
if (atag.get('href') not in self.haveVisited) and (urlparse(link).netloc == 'tutorialedge.net'):
self.linksToCrawl.put(atag.get('href'))
else :
print("{} already visited or not part of website".format(atag.get('href')))
print("Adding {} to crawled list".format(link))
self.haveVisited.append(link)
except URLError as e:
print("URL {} threw this error when trying to parse: {}".format(link, e.reason))
self.errorLinks.append(link)
finally:
self.linksToCrawl.task_done()
def lookup(twitter, user_ids):
"""Resolve an entire list of user ids to screen names."""
users = {}
api_limit = 100
for i in range(0, len(user_ids), api_limit):
fail = Fail()
while True:
try:
portion = lookup_portion(twitter, user_ids[i:][:api_limit])
except TwitterError as e:
if e.e.code == 429:
err("Fail: %i API rate limit exceeded" % e.e.code)
rls = twitter.application.rate_limit_status()
reset = rls.rate_limit_reset
reset = time.asctime(time.localtime(reset))
delay = int(rls.rate_limit_reset
- time.time()) + 5 # avoid race
err("Interval limit of %i requests reached, next reset on "
"%s: going to sleep for %i secs"
% (rls.rate_limit_limit, reset, delay))
fail.wait(delay)
continue
elif e.e.code == 502:
err("Fail: %i Service currently unavailable, retrying..."
% e.e.code)
else:
err("Fail: %s\nRetrying..." % str(e)[:500])
fail.wait(3)
except urllib2.URLError as e:
err("Fail: urllib2.URLError %s - Retrying..." % str(e))
fail.wait(3)
except httplib.error as e:
err("Fail: httplib.error %s - Retrying..." % str(e))
fail.wait(3)
except KeyError as e:
err("Fail: KeyError %s - Retrying..." % str(e))
fail.wait(3)
else:
users.update(portion)
err("Resolving user ids to screen names: %i/%i"
% (len(users), len(user_ids)))
break
return users
def follow(twitter, screen_name, followers=True):
"""Get the entire list of followers/following for a user."""
user_ids = []
cursor = -1
fail = Fail()
while True:
try:
portion, cursor = follow_portion(twitter, screen_name, cursor,
followers)
except TwitterError as e:
if e.e.code == 401:
reason = ("follow%s of that user are protected"
% ("ers" if followers else "ing"))
err("Fail: %i Unauthorized (%s)" % (e.e.code, reason))
break
elif e.e.code == 429:
err("Fail: %i API rate limit exceeded" % e.e.code)
rls = twitter.application.rate_limit_status()
reset = rls.rate_limit_reset
reset = time.asctime(time.localtime(reset))
delay = int(rls.rate_limit_reset
- time.time()) + 5 # avoid race
err("Interval limit of %i requests reached, next reset on %s: "
"going to sleep for %i secs" % (rls.rate_limit_limit,
reset, delay))
fail.wait(delay)
continue
elif e.e.code == 502:
err("Fail: %i Service currently unavailable, retrying..."
% e.e.code)
else:
err("Fail: %s\nRetrying..." % str(e)[:500])
fail.wait(3)
except urllib2.URLError as e:
err("Fail: urllib2.URLError %s - Retrying..." % str(e))
fail.wait(3)
except httplib.error as e:
err("Fail: httplib.error %s - Retrying..." % str(e))
fail.wait(3)
except KeyError as e:
err("Fail: KeyError %s - Retrying..." % str(e))
fail.wait(3)
else:
new = -len(user_ids)
user_ids = list(set(user_ids + portion))
new += len(user_ids)
what = "follow%s" % ("ers" if followers else "ing")
err("Browsing %s %s, new: %i" % (screen_name, what, new))
if cursor == 0:
break
fail = Fail()
return user_ids
def statuses(twitter, screen_name, tweets, mentions=False, favorites=False, received_dms=None, isoformat=False):
"""Get all the statuses for a screen name."""
max_id = None
fail = Fail()
# get portions of statuses, incrementing max id until no new tweets appear
while True:
try:
portion = statuses_portion(twitter, screen_name, max_id, mentions, favorites, received_dms, isoformat)
except TwitterError as e:
if e.e.code == 401:
err("Fail: %i Unauthorized (tweets of that user are protected)"
% e.e.code)
break
elif e.e.code == 429:
err("Fail: %i API rate limit exceeded" % e.e.code)
rls = twitter.application.rate_limit_status()
reset = rls.rate_limit_reset
reset = _time.asctime(_time.localtime(reset))
delay = int(rls.rate_limit_reset
- _time.time()) + 5 # avoid race
err("Interval limit of %i requests reached, next reset on %s: "
"going to sleep for %i secs" % (rls.rate_limit_limit,
reset, delay))
fail.wait(delay)
continue
elif e.e.code == 404:
err("Fail: %i This profile does not exist" % e.e.code)
break
elif e.e.code == 502:
err("Fail: %i Service currently unavailable, retrying..."
% e.e.code)
else:
err("Fail: %s\nRetrying..." % str(e)[:500])
fail.wait(3)
except urllib2.URLError as e:
err("Fail: urllib2.URLError %s - Retrying..." % str(e))
fail.wait(3)
except httplib.error as e:
err("Fail: httplib.error %s - Retrying..." % str(e))
fail.wait(3)
except KeyError as e:
err("Fail: KeyError %s - Retrying..." % str(e))
fail.wait(3)
else:
new = -len(tweets)
tweets.update(portion)
new += len(tweets)
err("Browsing %s statuses, new tweets: %i"
% (screen_name if screen_name else "home", new))
if new < 190:
break
max_id = min(portion.keys())-1 # browse backwards
fail = Fail()
def status(self):
status = {
# summary = 'notfound', 'sleeping', 'on', or 'recording'
'summary': 'notfound',
'raw': {}
}
camActive = True
# loop through different status URLs
for cmd in self.statusMatrix:
# stop sending requests if a previous request failed
if camActive:
url = self._statusURL(cmd)
# attempt to contact the camera
try:
response = urlopen(
url, timeout=self.timeout).read().encode('hex')
status['raw'][cmd] = response # save raw response
# loop through different parts we know how to translate
for item in self.statusMatrix[cmd]:
args = self.statusMatrix[cmd][item]
if 'a' in args and 'b' in args:
part = response[args['a']:args['b']]
else:
part = response
# translate the response value if we know how
if 'translate' in args:
status[item] = self._translate(
args['translate'], part)
else:
status[item] = part
except (HTTPError, URLError, socket.timeout) as e:
logging.warning('{}{} - error opening {}: {}{}'.format(
Fore.YELLOW, 'GoProHero.status()', url, e, Fore.RESET))
camActive = False
# build summary
if 'record' in status and status['record'] == 'on':
status['summary'] = 'recording'
elif 'power' in status and status['power'] == 'on':
status['summary'] = 'on'
elif 'power' in status and status['power'] == 'sleeping':
status['summary'] = 'sleeping'
logging.info('GoProHero.status() - result {}'.format(status))
return status