def retrieveForYear(year):
r = boto.connect_s3(host="s3.amazonaws.com") \
.get_bucket("irs-form-990") \
.get_key("index_%i.json" % year) \
.get_contents_as_string() \
.replace("\r", "")
j = json.loads(r)
# The index comes back as a single JSON key-value pair whose value is
# a JSON array of length one. Inside _that_ is an array of filings.
filings = j.values()[0]
if cred.prod:
return filings
else:
sample = filings[0:1000]
return sample
python类connect_s3()的实例源码
def loadXml(filings):
session = makeSession()
s3 = boto.connect_s3(host="s3.amazonaws.com")
bucket = s3.get_bucket("irs-form-990")
for filing in filings:
if filing.URL == None:
continue
key_str = filing.URL.split("/")[-1]
xml_str = key_to_str(bucket, key_str)
e = RawXML(xml_str, filing)
e.FormType = filing.FormType
session.add(e)
session.commit()
session.close()
def upload_to_s3(bucket_name, key_name, video_file):
cfg = Config()
# connect to the bucket
conn = boto.connect_s3(cfg.get("aws", "access_key_id"), cfg.get("aws", "secret_access_key"))
ret_val = False
try:
print("# S3: Uploading to Bucket: {0} / Video|Key: {1}".format(bucket_name, video_file))
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
if key_name:
k.key = key_name
else:
k.key = os.path.basename(video_file)
k.set_contents_from_filename(video_file)
ret_val = True
except boto.exception.S3ResponseError as err:
print(err)
return ret_val
def upload_to_s3(bucket_name, key_name, video_file):
cfg = Config()
# connect to the bucket
conn = boto.connect_s3(cfg.get("aws", "access_key_id"),
cfg.get("aws", "secret_access_key"))
ret_val = False
try:
print("# S3: Uploading to Bucket: {0} / Video|Key: {1}".format(bucket_name, video_file))
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
if key_name:
k.key = key_name
else:
k.key = os.path.basename(video_file)
k.set_contents_from_filename(video_file)
ret_val = True
except boto.exception.S3ResponseError as err:
print(err)
return ret_val
def download_from_s3(bucket_name, key_name, local_out_dir='/tmp'):
cfg = Config()
# connect to the bucket
conn = boto.connect_s3(cfg.get("aws", "access_key_id"),
cfg.get("aws", "secret_access_key"))
ret_val = (False, None)
try:
print("# S3: Fetching Bucket: {0} / Key: {1}".format(bucket_name, key_name))
bucket = conn.get_bucket(bucket_name)
key = bucket.get_key(key_name)
if key:
local_file = os.path.join(local_out_dir, os.path.basename(key_name))
print '# S3: Saving contents to Local File - {0}'.format(local_file)
key.get_contents_to_filename(local_file, response_headers={
'response-content-type': 'video/avi'
})
ret_val = (True, os.path.abspath(local_file))
except boto.exception.S3ResponseError as err:
print(err)
return ret_val
def delete_keys(bucket_name, key_pattern):
cfg = Config()
# connect to the bucket
conn = boto.connect_s3(cfg.get("aws", "access_key_id"),
cfg.get("aws", "secret_access_key"))
ret_val = True
try:
print("# S3: Fetching Keys from Bucket: {0}".format(bucket_name))
bucket = conn.get_bucket(bucket_name)
for key in bucket.get_all_keys():
print key
if os.path.basename(key.name).startswith(key_pattern):
key.delete()
print 'Deleted {0}'.format(key.name)
except boto.exception.S3ResponseError as err:
print(err)
ret_val = False
return ret_val
def encode(self, value):
"""
:type value: file-like object
:param value: A file-like object containing the content
of the message. The actual content will be stored
in S3 and a link to the S3 object will be stored in
the message body.
"""
bucket_name, key_name = self._get_bucket_key(self.s3_url)
if bucket_name and key_name:
return self.s3_url
key_name = uuid.uuid4()
s3_conn = boto.connect_s3()
s3_bucket = s3_conn.get_bucket(bucket_name)
key = s3_bucket.new_key(key_name)
key.set_contents_from_file(value)
self.s3_url = 's3://%s/%s' % (bucket_name, key_name)
return self.s3_url
def __init__(self):
super(CopyBot, self).__init__()
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)
def test_s3_save(self):
with mock_s3_deprecated():
s3_conn = boto.connect_s3()
bucket_name = 'fake-matrix-bucket'
bucket = s3_conn.create_bucket(bucket_name)
matrix_store_list = self.matrix_store()
for matrix_store in matrix_store_list:
matrix_store.save(project_path='s3://fake-matrix-bucket', name='test')
# HDF
hdf = HDFMatrixStore(matrix_path='s3://fake-matrix-bucket/test.h5', metadata_path='s3://fake-matrix-bucket/test.yaml')
# CSV
csv = CSVMatrixStore(matrix_path='s3://fake-matrix-bucket/test.csv', metadata_path='s3://fake-matrix-bucket/test.yaml')
assert csv.metadata == matrix_store_list[0].metadata
assert csv.matrix.to_dict() == matrix_store_list[0].matrix.to_dict()
assert hdf.metadata == matrix_store_list[0].metadata
assert hdf.matrix.to_dict() == matrix_store_list[0].matrix.to_dict()
def fopen(file, mode='r'):
if file == None:
if mode == 'r':
return sys.stdin
elif mode == 'w':
return sys.stdout
else:
# HACK: get boto working with instance credentials via boto3
match = re.match(s3_regex, file)
if match != None:
client = boto3.client('s3')
s3_connection = boto.connect_s3(
aws_access_key_id=client._request_signer._credentials.access_key,
aws_secret_access_key=client._request_signer._credentials.secret_key,
security_token=client._request_signer._credentials.token)
bucket = s3_connection.get_bucket(match.groups()[0])
if mode == 'w':
file = bucket.get_key(match.groups()[1], validate=False)
else:
file = bucket.get_key(match.groups()[1])
return smart_open(file, mode=mode)
def __init__(self):
ScriptBase.__init__(self)
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)
def destroy_bucket(cls, connection_data, bucket):
"""Destroys the bucket and its content, just for teardown."""
exc_num = 0
try:
with contextlib.closing(
boto.connect_s3(**connection_data)) as conn:
if isinstance(bucket, basestring):
bucket = conn.lookup(bucket)
assert isinstance(bucket, s3.bucket.Bucket)
for obj in bucket.list():
try:
bucket.delete_key(obj.key)
obj.close()
except BaseException:
LOG.exception("Failed to delete key %s " % obj.key)
exc_num += 1
conn.delete_bucket(bucket)
except BaseException:
LOG.exception("Failed to destroy bucket %s " % bucket)
exc_num += 1
if exc_num:
raise exceptions.TearDownException(num=exc_num)
def upload_pitr_data(self, db, pitr_data):
""" Upload a file of PITR data to s3 for each schema
Args:
db - the db that was backed up.
pitr_data - a dict of various data that might be helpful for running a
PITR
"""
s3_path = PATH_PITR_DATA.format(
replica_set=self.instance.get_zk_replica_set()[0],
date=self.datestamp,
db_name=db)
log.debug('{proc_id}: {db} Uploading pitr data to {s3_path}'
''.format(
s3_path=s3_path,
proc_id=multiprocessing.current_process().name,
db=db))
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(S3_CSV_BUCKET, validate=False)
key = bucket.new_key(s3_path)
key.set_contents_from_string(json.dumps(pitr_data))
def upload_schema(self, db, table, tmp_dir_db):
""" Upload the schema of a table to s3
Args:
db - the db to be backed up
table - the table to be backed up
tmp_dir_db - temporary storage used for all tables in the db
"""
(schema_path, _, _) = get_csv_backup_paths(
self.datestamp, db, table, self.instance.replica_type,
self.instance.get_zk_replica_set()[0])
create_stm = show_create_table(self.instance, db, table)
log.debug('{proc_id}: Uploading schema to {schema_path}'
''.format(
schema_path=schema_path,
proc_id=multiprocessing.current_process().name))
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(S3_CSV_BUCKET, validate=False)
key = bucket.new_key(schema_path)
key.set_contents_from_string(create_stm)
def already_uploaded(instance, binlog, logged_uploads):
""" Check to see if a binlog has already been uploaded
Args:
instance - a hostAddr object
binlog - the full path to the binlog file
logged_uploads - a set of all uploaded binlogs for this instance
Returns True if already uplaoded, False otherwise.
"""
if os.path.basename(binlog) in logged_uploads:
log.debug('Binlog already logged as uploaded')
return True
# we should hit this code rarely, only when uploads have not been logged
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(S3_BINLOG_BUCKET, validate=False)
if bucket.get_key(s3_binlog_path(instance, os.path.basename((binlog)))):
log.debug("Binlog already uploaded but not logged {b}".format(
b=binlog))
log_binlog_upload(instance, binlog)
return True
return False
def upload_file(file_path, bucket_name, key_name):
"""
Upload a file to the given s3 bucket and return a template url.
"""
conn = boto.connect_s3()
try:
bucket = conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError as e:
conn.create_bucket(bucket_name)
bucket = conn.get_bucket(bucket_name, validate=False)
key = boto.s3.key.Key(bucket)
key.key = key_name
key.set_contents_from_filename(file_path)
key.set_acl('public-read')
url = "https://s3.amazonaws.com/{}/{}".format(bucket.name, key.name)
print( "URL: {}".format(url))
return url
def _bucket():
"""
Gets the S3 bucket.
"""
conn = boto.connect_s3()
return conn.create_bucket(s3_bucket)
def main():
with open(answer_key.ANSWER_KEY_PATH, 'r') as f:
md5 = hashlib.md5()
while True:
buf = f.read(1024)
if not buf:
break
md5.update(buf)
local_hash = md5.hexdigest()
s3_conn = boto.connect_s3()
bucket = s3_conn.get_bucket(BUCKET_NAME)
key = boto.s3.key.Key(bucket)
key.key = "risk/{local_hash}/risk-answer-key.xlsx".format(
local_hash=local_hash)
key.set_contents_from_filename(answer_key.ANSWER_KEY_PATH)
key.set_acl('public-read')
download_link = "http://s3.amazonaws.com/{bucket_name}/{key}".format(
bucket_name=BUCKET_NAME,
key=key.key)
print("Uploaded to key: {key}".format(key=key.key))
print("Download link: {download_link}".format(download_link=download_link))
# Now update checksum file with the recently added answer key.
# checksum file update will be then need to be commited via git.
with open(answer_key.ANSWER_KEY_CHECKSUMS_PATH, 'a') as checksum_file:
checksum_file.write(local_hash)
checksum_file.write("\n")
def s3connect(user):
"""
Return an S3 connection
"""
endpoint = endpoints()[0]
s3conn = boto.connect_s3(
aws_access_key_id=access_key(user),
aws_secret_access_key=secret_key(user),
host=endpoint['host'],
is_secure=bool(endpoint['ssl']),
port=int(endpoint['port']),
calling_format=boto.s3.connection.OrdinaryCallingFormat(),
)
return s3conn
def write_s3_data():
"""Takes the default data from FakeBucket and writes it to S3.
Allows running the same tests against fakes and the boto api.
"""
cfg = get_config()
bucket = boto.connect_s3(
cfg['S3_KEY_ID'], cfg['S3_SECRET']).get_bucket(cfg['BUCKET'])
for name, metadata in FakeBucket.fake_data.iteritems():
key = bucket.new_key(os.path.join(FakeBucket.rand_prefix, name))
headers = {("x-amz-meta-" + k): v for k, v in metadata.iteritems()}
key.set_contents_from_string("spam", headers=headers)
return bucket