def download(url, local, **kwargs):
if not local:
raise ValueError('local filepath is empty')
try:
if not os.path.exists(os.path.dirname(local)):
os.makedirs(os.path.dirname(local))
res = Request(url, **kwargs)
read_size = 0
real_size = int(res.header['content-length'])
with open(local, 'wb') as f:
while True:
block = res.response.read(1024*8)
if not block:
break
f.write(block)
read_size += len(block)
if read_size < real_size:
raise urllib.ContentTooShortError(
'retrieval incomplete: got only {} out of {} bytes'.formate(read_size, real_size),
None
)
except Exception, e:
raise e
python类ContentTooShortError()的实例源码
def setup_os_fingerprints(os_file):
"""Function downloads nmap-os-db from internet if not found at given location
Args:
os_file : location of nmap-os-db file
"""
if not os.path.isfile(os_file):
try:
logger.info('Retrieving Nmap fingerprint database')
urllib.urlretrieve(
"https://svn.nmap.org/nmap/nmap-os-db",
os.path.join(
package_directory,
"templates/nmap-os-db"))
except urllib.ContentTooShortError:
logger.exception('Connection interupted: nmap-os-db retrieval failed')
sys.exit(1)
def setup_mac_prefix(mac_file):
"""Function downloads nmap-mac-prefixes from internet if not found at given location
Args:
mac_file : location of nmap-mac_prefixes file
"""
if not os.path.isfile(mac_file):
try:
logger.info('Retrieving Nmap MAC prefix database')
urllib.urlretrieve(
"https://svn.nmap.org/nmap/nmap-mac-prefixes",
os.path.join(
package_directory,
"templates/nmap-mac-prefixes"))
except urllib.ContentTooShortError:
logger.exception('Connection interupted: nmap-mac-prefixes retrieval failed')
sys.exit(1)
def test_short_content_raises_ContentTooShortError(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
def _reporthook(par1, par2, par3):
pass
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
'http://example.com', reporthook=_reporthook)
finally:
self.unfakehttp()
def test_short_content_raises_ContentTooShortError(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
def _reporthook(par1, par2, par3):
pass
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
'http://example.com', reporthook=_reporthook)
finally:
self.unfakehttp()
def test_short_content_raises_ContentTooShortError(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
def _reporthook(par1, par2, par3):
pass
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
'http://example.com', reporthook=_reporthook)
finally:
self.unfakehttp()
def download_csv(ticker_name, period_start, period_end, frequency='daily'):
url = BASE_URL + "{}&a={}&b={}&c={}&d={}&e={}&f={}&g={}&ignore=.csv".format(ticker_name,
period_start.month - 1,
period_start.day,
period_start.year,
period_end.month - 1,
period_end.day,
period_end.year,
FREQUENCY[frequency])
path = CSV_PATH_PREFIX
if not os.path.exists(path):
os.makedirs(path)
filename = "{}{}.csv".format(path, ticker_name)
try:
urllib.request.urlretrieve(url, filename)
except urllib.ContentTooShortError as e:
f = open(filename, 'w')
f.write(e.content)
f.close()
finally:
return filename
def test_short_content_raises_ContentTooShortError(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
def _reporthook(par1, par2, par3):
pass
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
'http://example.com', reporthook=_reporthook)
finally:
self.unfakehttp()
def download(url, local, **kwargs):
if not local:
raise ValueError('local filepath is empty')
try:
if not os.path.exists(os.path.dirname(local)):
os.makedirs(os.path.dirname(local))
res = Request(url, **kwargs)
read_size = 0
real_size = int(res.header['content-length'])
with open(local, 'wb') as f:
while True:
block = res.response.read(1024*8)
if not block:
break
f.write(block)
read_size += len(block)
if read_size < real_size:
raise ContentTooShortError(
'retrieval incomplete: got only {} out of {} bytes'.formate(read_size, real_size),
None
)
except Exception as e:
raise e
def download_single_img(img_url):
print img_url
try:
urllib.urlretrieve(img_url, './img/' + os.path.basename(img_url))
except urllib.ContentTooShortError, e:
print e
try: # ???????
urllib.urlretrieve(img_url, './img/' + os.path.basename(img_url))
except urllib.ContentTooShortError, e:
print e
print 'secondError: ' + img_url
def mirror_modis_dates_html(base_url, mirror_dir, use_wget=False):
"""
Download all MODIS date listing pages to a local directory.
Usually, a MODIS listing for a date should not change (only new dates
should be added), so there should be no need to re-download.
"""
ndownloads = 0
dates_urls = collect_all_dates_pages(base_url)
utils.mkdir_p(mirror_dir)
for date, url in dates_urls:
fname = os.path.join(mirror_dir, date + '.html')
if not os.path.exists(fname):
print 'Downloading ', fname
if use_wget:
subprocess.check_call('/usr/bin/wget %s -O %s' % (url, fname),
shell=True)
else:
urllib.urlretrieve(url, fname)
ndownloads += 1
# The MODIS MOLT repository server doesn't return Content-Length
# so urllib cannot tell if it downloaded the whole html or was
# just disconnected, which could lead to incomplete HTML being
# downloaded. So we check if the downloaded file ends with </html>
with open(fname, 'r') as f:
# seek 10 bytes from the end
f.seek(-10, 2)
line = f.read(10)
if "</html>" not in line:
raise urllib.ContentTooShortError(
"Couldn't find </html> in downloaded file, probably a partial download", ""
)
# Just avoid firing requests as fast as possible
time.sleep(0.1)
return ndownloads > 0
def test_short_content_raises_ContentTooShortError_without_reporthook(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
finally:
self.unfakehttp()
def test_short_content_raises_ContentTooShortError_without_reporthook(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
finally:
self.unfakehttp()
def auto_down(url, filename):
try:
urllib.urlretrieve(url, filename)
except urllib.ContentTooShortError:
print 'Network conditions is not good.\nReloading.'
auto_down(url, filename)
def test_short_content_raises_ContentTooShortError_without_reporthook(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
finally:
self.unfakehttp()
def test_short_content_raises_ContentTooShortError_without_reporthook(self):
self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1
FF
''')
try:
self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
finally:
self.unfakehttp()