def __init__(
self,
aws_access_key_id=None, # CREDENTIAL
aws_secret_access_key=None, # CREDENTIAL
region=None, # NAME OF AWS REGION, REQUIRED FOR SOME BUCKETS
kwargs=None
):
self.settings = kwargs
try:
if not kwargs.region:
self.connection = boto.connect_s3(
aws_access_key_id=unwrap(self.settings.aws_access_key_id),
aws_secret_access_key=unwrap(self.settings.aws_secret_access_key)
)
else:
self.connection = boto.s3.connect_to_region(
self.settings.region,
aws_access_key_id=unwrap(self.settings.aws_access_key_id),
aws_secret_access_key=unwrap(self.settings.aws_secret_access_key)
)
except Exception as e:
Log.error("Problem connecting to S3", e)
python类connect_s3()的实例源码
def test_integration(sample_data):
cfg = get_config()
stream_handler = StreamHandler(sample_data)
bucket = boto.connect_s3(
cfg['S3_KEY_ID'], cfg['S3_SECRET']).get_bucket(cfg['BUCKET'])
key_name = "z3_test_" + datetime.now().strftime("%Y%m%d_%H-%M-%S")
sup = UploadSupervisor(
stream_handler,
key_name,
bucket=bucket,
headers=parse_metadata(["ana=are+mere", "dana=are=pere"])
)
etag = sup.main_loop()
uploaded = bucket.get_key(key_name)
assert etag == '"d229c1fc0e509475afe56426c89d2724-2"'
assert etag == uploaded.etag
assert uploaded.metadata == {"ana": "are+mere", "dana": "are=pere"}
def main():
cfg = get_config()
parser = argparse.ArgumentParser(
description='Cleanup hanging multipart s3 uploads',
)
parser.add_argument('--max-age',
dest='max_days',
default=1,
type=int,
help='maximum age in days')
parser.add_argument('--dry',
dest='dry_run',
action='store_true',
help='Don\'t cancel any upload')
args = parser.parse_args()
bucket = boto.connect_s3(
cfg['S3_KEY_ID'], cfg['S3_SECRET']).get_bucket(cfg['BUCKET'])
cleanup_multipart(
bucket,
max_days=args.max_days,
dry_run=args.dry_run,
)
def test_s3_save(self):
with mock_s3_deprecated():
s3_conn = boto.connect_s3()
bucket_name = 'fake-matrix-bucket'
s3_conn.create_bucket(bucket_name)
matrix_store_list = self.matrix_store()
for matrix_store in matrix_store_list:
matrix_store.save(project_path='s3://fake-matrix-bucket', name='test')
# HDF
hdf = HDFMatrixStore(matrix_path='s3://fake-matrix-bucket/test.h5', metadata_path='s3://fake-matrix-bucket/test.yaml')
# CSV
csv = CSVMatrixStore(matrix_path='s3://fake-matrix-bucket/test.csv', metadata_path='s3://fake-matrix-bucket/test.yaml')
assert csv.metadata == matrix_store_list[0].metadata
assert csv.matrix.to_dict() == matrix_store_list[0].matrix.to_dict()
assert hdf.metadata == matrix_store_list[0].metadata
assert hdf.matrix.to_dict() == matrix_store_list[0].matrix.to_dict()
def to_s3(ident, doc_type):
os.environ['http_proxy'] = 'http://dumont.getty.edu:80'
os.environ['https_proxy'] = 'https://dumont.getty.edu:80'
# Connect to s3 and get bucket
rw = boto.connect_s3(aws_access_key_id=aws_access,
aws_secret_access_key=aws_secret)
b = rw.get_bucket(aws_bucket)
print('{}{}/{}.json'.format(iiif_prezi_base, ident, doc_type))
k = Key(b, '{}{}/{}.json'.format(iiif_prezi_base, ident, doc_type))
if doc_type == 'collection':
print('{}/{}/collection.json'.format(collection_dir, ident))
k.set_contents_from_filename('{}/{}/collection.json'.format(collection_dir, ident))
elif doc_type == 'manifest':
print('{}/{}/manifest.json'.format(manifest_dir, ident))
k.set_contents_from_filename('{}/{}/manifest.json'.format(manifest_dir, ident))
c.execute('INSERT OR REPLACE INTO {}_prezi_docs VALUES (?, ?)'.format(project), (ident, 1))
conn.commit()
print('{} complete and added to DB'.format(ident))
def save_image_to_s3(self):
"""TODO"""
import boto
s3_connection = boto.connect_s3()
bucket = s3_connection.get_bucket('endorsementdb.com')
url = self.get_large_image()
response = requests.get(url, stream=True)
with open('/tmp/profile_image.png', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
key = bucket.new_key('images/endorsers/%d.png' % self.endorser.pk)
key.set_contents_from_filename(out_file.name)
key.make_public()
def handle(self, *args, **options):
s3_connection = boto.connect_s3()
bucket = s3_connection.get_bucket('endorsementdb.com')
usernames = options['usernames']
for username in usernames:
account = Account.objects.get_from_username(username)
endorser = account.endorser
url = account.get_large_image()
print url, endorser.name
response = requests.get(url, stream=True)
with open('/tmp/profile_image.png', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
key = bucket.new_key('images/endorsers/%d.png' % endorser.pk)
key.set_contents_from_filename(out_file.name)
key.make_public()
def setUp(self):
"""
Set up a mock S3 connection, bucket, and key, using moto.
"""
self.bucket_name = 's3storagesdrivertest'
conn = boto.connect_s3()
# We need to create the bucket since this is all in Moto's 'virtual' AWS account
conn.create_bucket(self.bucket_name)
self.bucket = conn.get_bucket(self.bucket_name)
key = self.bucket.new_key('the_dag/the_task/1983-09-05')
data = 'this is a test.'
key.set_metadata('Content-Type', 'text/plain')
key.set_contents_from_string(data)
key.set_acl('private')
self.driver = S3StorageDriver('', '', self.bucket_name)
def __init__(self, access_key_id, secret_access_key, bucket_name):
"""
Set up the credentials and bucket name.
:param str access_key_id: AWS credentials.
:param str secret_access_key: AWS credentials.
:param str bucket_name: The S3 bucket to use.
"""
super(S3StorageDriver, self).__init__()
self.bucket_name = bucket_name
self.s3 = boto.connect_s3(
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key
)
self.bucket = self.s3.get_bucket(self.bucket_name)
def _CLEANUP(self):
"""
check for workflow simplification
"""
if self.auth_dict['veda_deliverable_bucket'] == \
self.auth_dict['edx_s3_endpoint_bucket']:
return
try:
conn = boto.connect_s3()
except S3ResponseError:
return
del_bucket = conn.get_bucket(
self.auth_dict['veda_deliverable_bucket']
)
k = Key(del_bucket)
k.key = self.encoded_file
k.delete()
def about_video_ingest(self):
"""
Crawl VEDA Upload bucket
"""
if self.node_work_directory is None:
print '[Discovery Error] No Workdir'
return
try:
conn = boto.connect_s3()
except NoAuthHandlerFound:
print '[Discovery Error] BOTO Auth Handler'
return
try:
self.bucket = conn.get_bucket(self.auth_dict['veda_s3_upload_bucket'])
except S3ResponseError:
return None
for key in self.bucket.list('upload/', '/'):
meta = self.bucket.get_key(key.name)
if meta.name != 'upload/':
self.about_video_validate(
meta=meta,
key=key
)
def discover_studio_ingested_videos(self):
"""
Discovers studio ingested videos, for further validations and processes.
"""
if self.node_work_directory:
try:
connection = boto.connect_s3()
self.bucket = connection.get_bucket(self.auth_dict['edx_s3_ingest_bucket'])
for video_s3_key in self.bucket.list(self.auth_dict['edx_s3_ingest_prefix'], '/'):
if video_s3_key.name != 'prod-edx/unprocessed/':
self.validate_metadata_and_feed_to_ingest(video_s3_key=self.bucket.get_key(video_s3_key.name))
except S3ResponseError:
ErrorObject.print_error(message='[File Ingest] S3 Ingest Connection Failure')
except NoAuthHandlerFound:
ErrorObject.print_error(message='[Discovery Error] BOTO Auth Handler')
else:
ErrorObject.print_error(message='[File Ingest] No Working Node directory')
def setUp(self):
"""Creates a named load balancer that can be safely
deleted at the end of each test"""
self.conn = ELBConnection()
self.name = 'elb-boto-unit-test'
self.availability_zones = ['us-east-1a']
self.listeners = [(80, 8000, 'HTTP')]
self.balancer = self.conn.create_load_balancer(
self.name, self.availability_zones, self.listeners)
# S3 bucket for log tests
self.s3 = boto.connect_s3()
self.timestamp = str(int(time.time()))
self.bucket_name = 'boto-elb-%s' % self.timestamp
self.bucket = self.s3.create_bucket(self.bucket_name)
self.bucket.set_canned_acl('public-read-write')
self.addCleanup(self.cleanup_bucket, self.bucket)
def test_storage_uri_regionless(self):
# First, create a bucket in a different region.
conn = S3Connection(
host='s3-us-west-2.amazonaws.com'
)
bucket_name = 'keytest-%d' % int(time.time())
bucket = conn.create_bucket(bucket_name, location=Location.USWest2)
self.addCleanup(self.nuke_bucket, bucket)
# Now use ``storage_uri`` to try to make a new key.
# This would throw a 301 exception.
suri = boto.storage_uri('s3://%s/test' % bucket_name)
the_key = suri.new_key()
the_key.key = 'Test301'
the_key.set_contents_from_string(
'This should store in a different region.'
)
# Check it a different way.
alt_conn = boto.connect_s3(host='s3-us-west-2.amazonaws.com')
alt_bucket = alt_conn.get_bucket(bucket_name)
alt_key = alt_bucket.get_key('Test301')
def encode(self, value):
"""
:type value: file-like object
:param value: A file-like object containing the content
of the message. The actual content will be stored
in S3 and a link to the S3 object will be stored in
the message body.
"""
bucket_name, key_name = self._get_bucket_key(self.s3_url)
if bucket_name and key_name:
return self.s3_url
key_name = uuid.uuid4()
s3_conn = boto.connect_s3()
s3_bucket = s3_conn.get_bucket(bucket_name)
key = s3_bucket.new_key(key_name)
key.set_contents_from_file(value)
self.s3_url = 's3://%s/%s' % (bucket_name, key_name)
return self.s3_url
def __init__(self):
super(CopyBot, self).__init__()
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)
def test_field_value_counter():
counter = FieldValueCounter(quarter='2014Q1', field_values=['jobtitle', 'jobdesc'])
counter.track(
input_document={'jobtitle': 'test', 'jobdesc': 'test'},
)
counter.track(
input_document={'jobtitle': 'test', 'jobdesc': '', 'extra': 'test'},
)
assert counter.accumulator['jobtitle']['test'] == 2
assert counter.accumulator['jobdesc']['test'] == 1
assert counter.accumulator['jobdesc'][''] == 1
with moto.mock_s3():
s3_conn = boto.connect_s3()
s3_conn.create_bucket('test-bucket')
counter.save(s3_conn, 'test-bucket/stats')
key = s3_conn.get_bucket('test-bucket')\
.get_key('stats/field_values/2014Q1/jobtitle.csv')
expected_count = 'test,2'
assert key.get_contents_as_string().decode('utf-8').rstrip() == expected_count
test_transform_dataset_stats.py 文件源码
项目:skills-ml
作者: workforce-data-initiative
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def test_dataset_stats_counter_empty():
counter = DatasetStatsCounter(quarter='2013Q1', dataset_id='VA')
with moto.mock_s3():
with freeze_time('2017-01-10'):
s3_conn = boto.connect_s3()
s3_conn.create_bucket('test-bucket')
counter.save(s3_conn, 'test-bucket/stats')
key = s3_conn.get_bucket('test-bucket')\
.get_key('stats/quarterly/VA_2013Q1')
expected_stats = {
'total': 0,
'output_counts': {},
'input_counts': {},
'output_percentages': {},
'input_percentages': {},
'last_updated': '2017-01-10T00:00:00',
'quarter': '2013Q1',
}
assert json.loads(key.get_contents_as_string().decode('utf-8')) == expected_stats
test_transform_dataset_stats.py 文件源码
项目:skills-ml
作者: workforce-data-initiative
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def test_dataset_stats_aggregator():
with moto.mock_s3():
s3_conn = boto.connect_s3()
aggregator = DatasetStatsAggregator(dataset_id='CB', s3_conn=s3_conn)
add_s3_content(
s3_conn,
{
'test-bucket/stats/quarterly/CB_2014Q1':
json.dumps(sample_quarter_stats('2014Q1')),
'test-bucket/stats/quarterly/CB_2014Q2':
json.dumps(sample_quarter_stats('2014Q2')),
'test-bucket/stats/quarterly/VA_2014Q1':
json.dumps(sample_quarter_stats('2014Q1')),
}
)
with freeze_time('2017-01-10'):
aggregator.run('test-bucket/stats')
expected_stats = sample_dataset_stats()
key = s3_conn.get_bucket('test-bucket')\
.get_key('stats/dataset_summaries/CB.json')
assert json.loads(key.get_contents_as_string().decode('utf-8')) == expected_stats
test_transform_dataset_stats.py 文件源码
项目:skills-ml
作者: workforce-data-initiative
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def test_total_job_postings():
with moto.mock_s3():
s3_conn = boto.connect_s3()
s3_conn.create_bucket('stats-bucket')
bucket = s3_conn.get_bucket('stats-bucket')
key = boto.s3.key.Key(
bucket=bucket,
name='partner-etl/summary.json'
)
key.set_contents_from_string(json.dumps({
'total': 8,
'output_counts': {
'title': 8,
'description': 4
},
'output_percentages': {
'title': 1.0,
'description': 0.5
},
'last_updated': '2017-01-10T00:00:00',
}))
assert GlobalStatsAggregator(s3_conn)\
.saved_total(config['partner_stats']['s3_path']) == 8
test_transform_dataset_stats.py 文件源码
项目:skills-ml
作者: workforce-data-initiative
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def test_quarterly_posting_stats():
with moto.mock_s3():
s3_conn = boto.connect_s3()
s3_conn.create_bucket('stats-bucket')
bucket = s3_conn.get_bucket('stats-bucket')
upload_quarterly_dataset_counts(bucket, 'XX', '2014Q1', 5)
upload_quarterly_dataset_counts(bucket, 'XX', '2014Q2', 6)
upload_quarterly_dataset_counts(bucket, 'XX', '2014Q3', 7)
upload_quarterly_dataset_counts(bucket, 'XX', '2014Q4', 8)
upload_quarterly_dataset_counts(bucket, 'ZZ', '2014Q1', 10)
upload_quarterly_dataset_counts(bucket, 'ZZ', '2014Q2', 9)
upload_quarterly_dataset_counts(bucket, 'ZZ', '2014Q3', 8)
upload_quarterly_dataset_counts(bucket, 'ZZ', '2014Q4', 10)
assert DatasetStatsCounter.quarterly_posting_stats(
s3_conn,
config['partner_stats']['s3_path']
) == {
'2014Q1': 15,
'2014Q2': 15,
'2014Q3': 15,
'2014Q4': 18
}
def test_cbsa_finder_nohits():
s3_conn = boto.connect_s3()
s3_conn.create_bucket('geobucket')
shapefile_name = 'tests/sample_cbsa_shapefile.shp'
finder = S3CachedCBSAFinder(
s3_conn=s3_conn,
cache_s3_path='geobucket/cbsas.json',
shapefile_name=shapefile_name
)
sample_input = {
"bbox": {
"northeast": [65.2, 65.8],
"southwest": [65.2, 65.8]
},
}
assert finder.query(sample_input) == None
def test_cbsa_finder_twohits():
s3_conn = boto.connect_s3()
s3_conn.create_bucket('geobucket')
shapefile_name = 'tests/sample_cbsa_shapefile.shp'
finder = S3CachedCBSAFinder(
s3_conn=s3_conn,
cache_s3_path='geobucket/cbsas.json',
shapefile_name=shapefile_name
)
sample_input = {
"bbox": {
"northeast": [38.00, -81.05],
"southwest": [35.13, -88.18]
},
}
assert finder.query(sample_input) == (
'40080',
'Richmond-Berea, KY Micro Area',
)
test_datasets_job_postings.py 文件源码
项目:skills-ml
作者: workforce-data-initiative
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_job_postings():
s3_conn = boto.connect_s3()
bucket_name = 'test-bucket'
path = 'postings'
quarter = '2014Q1'
bucket = s3_conn.create_bucket(bucket_name)
for i in range(0, 2):
key = boto.s3.key.Key(
bucket=bucket,
name='{}/{}/{}'.format(path, quarter, i)
)
key.set_contents_from_string('test')
# both variants of job postings getter should have identical results
for func in [job_postings, job_postings_highmem]:
postings = [posting for posting in func(
s3_conn,
quarter,
'{}/{}'.format(bucket_name, path)
)]
assert postings == ['test'] * 2
def encode(self, value):
"""
:type value: file-like object
:param value: A file-like object containing the content
of the message. The actual content will be stored
in S3 and a link to the S3 object will be stored in
the message body.
"""
bucket_name, key_name = self._get_bucket_key(self.s3_url)
if bucket_name and key_name:
return self.s3_url
key_name = uuid.uuid4()
s3_conn = boto.connect_s3()
s3_bucket = s3_conn.get_bucket(bucket_name)
key = s3_bucket.new_key(key_name)
key.set_contents_from_file(value)
self.s3_url = 's3://%s/%s' % (bucket_name, key_name)
return self.s3_url
def __init__(self):
super(CopyBot, self).__init__()
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)
csv_loader_for_redshift.py 文件源码
项目:CSV_Loader_For_Redshift
作者: alexbuz
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def main(transfer_file, bucket_name, s3_key_name=None, use_rr=False,
make_public=True):
global bucket
# open the wikipedia file
if not s3_key_name:
s3_key_name = os.path.basename(transfer_file)
conn = boto.connect_s3(AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY)
bucket = conn.get_bucket(bucket_name)
file_handle = open(transfer_file, 'rb')
k = Key(bucket)
k.key = s3_key_name
k.set_contents_from_file(file_handle, cb=progress, num_cb=20, reduced_redundancy=use_rr )
if make_public:
k.make_public()
return '/'.join((bucket_name, s3_key_name))
def setGridList(self, year,month,day):
s3conn = boto.connect_s3()
bucket = s3conn.get_bucket('noaa-nexrad-level2')
keys = bucket.list(prefix= year + '/' + month + '/' + day + '/',
delimiter='/')
tmp = []
for key in keys:
tmp.append(key.name.split('/')[-2])
self.gridList = tmp
if(self.grid not in self.gridList):
print("The site selected is not available for " + year
+ ' ' + month + '/' + day + '. The site has defaulted to : ' +
self.gridList[0] +
'. Please re-select the site you would like to view')
self.selectionChangeHour(0)
self.selectionChangeMMSS(0)
self.selectionChangeGrid(0)
else:
self.currentGridIndex = np.where(np.array(self.gridList) == self.grid)[0][0]
def __init__(self):
ScriptBase.__init__(self)
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)
def __init__(self):
ScriptBase.__init__(self)
self.wdir = boto.config.get('Pyami', 'working_dir')
self.log_file = '%s.log' % self.instance_id
self.log_path = os.path.join(self.wdir, self.log_file)
boto.set_file_logger(self.name, self.log_path)
self.src_name = boto.config.get(self.name, 'src_bucket')
self.dst_name = boto.config.get(self.name, 'dst_bucket')
self.replace = boto.config.getbool(self.name, 'replace_dst', True)
s3 = boto.connect_s3()
self.src = s3.lookup(self.src_name)
if not self.src:
boto.log.error('Source bucket does not exist: %s' % self.src_name)
dest_access_key = boto.config.get(self.name, 'dest_aws_access_key_id', None)
if dest_access_key:
dest_secret_key = boto.config.get(self.name, 'dest_aws_secret_access_key', None)
s3 = boto.connect(dest_access_key, dest_secret_key)
self.dst = s3.lookup(self.dst_name)
if not self.dst:
self.dst = s3.create_bucket(self.dst_name)