From f912b9efee5989f6263314bd448444fcb0ba27e7 Mon Sep 17 00:00:00 2001 From: Henri Blancke Date: Thu, 23 Jul 2020 07:44:34 -0400 Subject: [PATCH] Support ACL for S3 uploads (#89) --- .gitignore | 4 +++- README.md | 6 ++++-- setup.py | 3 ++- target_snowflake/db_sync.py | 10 +++++++--- tests/integration/.env.sample | 15 +++++++++++++++ tests/integration/utils.py | 3 +++ 6 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 tests/integration/.env.sample diff --git a/.gitignore b/.gitignore index 26616581..36d6c9f3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ .vscode .idea/* - # Python __pycache__/ *.py[cod] @@ -30,3 +29,6 @@ tmp # Docs docs/_build/ docs/_templates/ + +# Environment +.env diff --git a/README.md b/README.md index 4f8b1cdc..a2e808f0 100644 --- a/README.md +++ b/README.md @@ -134,8 +134,9 @@ Full list of options in `config.json`: | aws_access_key_id | String | No | S3 Access Key Id. If not provided, AWS_ACCESS_KEY_ID environment variable or IAM role will be used | | aws_secret_access_key | String | No | S3 Secret Access Key. If not provided, AWS_SECRET_ACCESS_KEY environment variable or IAM role will be used | | aws_session_token | String | No | AWS Session token. If not provided, AWS_SESSION_TOKEN environment variable will be used | +| s3_acl | String | No | S3 ACL name | | s3_bucket | String | Yes | S3 Bucket name | -| s3_key_prefix | String | | (Default: None) A static prefix before the generated S3 key names. Using prefixes you can upload files into specific directories in the S3 bucket. | +| s3_key_prefix | String | No | (Default: None) A static prefix before the generated S3 key names. Using prefixes you can upload files into specific directories in the S3 bucket. | | stage | String | Yes | Named external stage name created at pre-requirements section. Has to be a fully qualified name including the schema name | | file_format | String | Yes | Named file format name created at pre-requirements section. Has to be a fully qualified name including the schema name. | | batch_size_rows | Integer | | (Default: 100000) Maximum number of rows in each batch. At the end of each batch, the rows in the batch are loaded into Snowflake. | @@ -158,7 +159,7 @@ Full list of options in `config.json`: ### To run tests: -1. Define environment variables that requires running the tests +1. Define the environment variables that are required to run the tests by creating a `.env` file in `tests/integration`, or by exporting the variables below. ``` export TARGET_SNOWFLAKE_ACCOUNT= export TARGET_SNOWFLAKE_DBNAME= @@ -168,6 +169,7 @@ Full list of options in `config.json`: export TARGET_SNOWFLAKE_SCHEMA= export TARGET_SNOWFLAKE_AWS_ACCESS_KEY= export TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY= + export TARGET_SNOWFLAKE_S3_ACL= export TARGET_SNOWFLAKE_S3_BUCKET= export TARGET_SNOWFLAKE_S3_KEY_PREFIX= export TARGET_SNOWFLAKE_STAGE= diff --git a/setup.py b/setup.py index 36785537..921df386 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,8 @@ "test": [ "nose==1.3.7", "mock==3.0.5", - "pylint==2.4.2" + "pylint==2.4.2", + "python-dotenv==0.14.0" ] }, entry_points=""" diff --git a/target_snowflake/db_sync.py b/target_snowflake/db_sync.py index a9aa4e84..02896b28 100644 --- a/target_snowflake/db_sync.py +++ b/target_snowflake/db_sync.py @@ -371,6 +371,7 @@ def put_to_stage(self, file, stream, count, temp_dir=None): # Generating key in S3 bucket bucket = self.connection_config['s3_bucket'] + s3_acl = self.connection_config.get('s3_acl') s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = "{}pipelinewise_{}_{}.csv".format(s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")) @@ -392,19 +393,22 @@ def put_to_stage(self, file, stream, count, temp_dir=None): ) # Upload to s3 + extra_args = {'ACL': s3_acl} if s3_acl else dict() + # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file - metadata = { + extra_args['Metadata'] = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } - self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={'Metadata': metadata}) + self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: - self.s3.upload_file(file, bucket, s3_key) + extra_args = {'ACL': s3_acl} if s3_acl else None + self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args) return s3_key diff --git a/tests/integration/.env.sample b/tests/integration/.env.sample new file mode 100644 index 00000000..6a41e290 --- /dev/null +++ b/tests/integration/.env.sample @@ -0,0 +1,15 @@ +TARGET_SNOWFLAKE_ACCOUNT= +TARGET_SNOWFLAKE_DBNAME= +TARGET_SNOWFLAKE_USER= +TARGET_SNOWFLAKE_PASSWORD= +TARGET_SNOWFLAKE_WAREHOUSE= +TARGET_SNOWFLAKE_SCHEMA= +TARGET_SNOWFLAKE_AWS_ACCESS_KEY= +TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY= +TARGET_SNOWFLAKE_S3_ACL= +TARGET_SNOWFLAKE_S3_BUCKET= +TARGET_SNOWFLAKE_S3_KEY_PREFIX= +TARGET_SNOWFLAKE_STAGE= +TARGET_SNOWFLAKE_FILE_FORMAT= +CLIENT_SIDE_ENCRYPTION_MASTER_KEY= +CLIENT_SIDE_ENCRYPTION_STAGE_OBJECT= diff --git a/tests/integration/utils.py b/tests/integration/utils.py index b9131404..46a73fce 100644 --- a/tests/integration/utils.py +++ b/tests/integration/utils.py @@ -1,6 +1,8 @@ import os import json +from dotenv import load_dotenv +load_dotenv() def get_db_config(): config = {} @@ -26,6 +28,7 @@ def get_db_config(): config['aws_secret_access_key'] = os.environ.get('TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY') config['s3_bucket'] = os.environ.get('TARGET_SNOWFLAKE_S3_BUCKET') config['s3_key_prefix'] = os.environ.get('TARGET_SNOWFLAKE_S3_KEY_PREFIX') + config['s3_acl'] = os.environ.get('TARGET_SNOWFLAKE_S3_ACL') # External stage in snowflake with client side encryption details config['client_side_encryption_master_key'] = os.environ.get('CLIENT_SIDE_ENCRYPTION_MASTER_KEY')