Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Support ACL for S3 uploads (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
henriblancke authored Jul 23, 2020
1 parent 38e173a commit f912b9e
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 7 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
.vscode
.idea/*


# Python
__pycache__/
*.py[cod]
Expand Down Expand Up @@ -30,3 +29,6 @@ tmp
# Docs
docs/_build/
docs/_templates/

# Environment
.env
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ Full list of options in `config.json`:
| aws_access_key_id | String | No | S3 Access Key Id. If not provided, AWS_ACCESS_KEY_ID environment variable or IAM role will be used |
| aws_secret_access_key | String | No | S3 Secret Access Key. If not provided, AWS_SECRET_ACCESS_KEY environment variable or IAM role will be used |
| aws_session_token | String | No | AWS Session token. If not provided, AWS_SESSION_TOKEN environment variable will be used |
| s3_acl | String | No | S3 ACL name |
| s3_bucket | String | Yes | S3 Bucket name |
| s3_key_prefix | String | | (Default: None) A static prefix before the generated S3 key names. Using prefixes you can upload files into specific directories in the S3 bucket. |
| s3_key_prefix | String | No | (Default: None) A static prefix before the generated S3 key names. Using prefixes you can upload files into specific directories in the S3 bucket. |
| stage | String | Yes | Named external stage name created at pre-requirements section. Has to be a fully qualified name including the schema name |
| file_format | String | Yes | Named file format name created at pre-requirements section. Has to be a fully qualified name including the schema name. |
| batch_size_rows | Integer | | (Default: 100000) Maximum number of rows in each batch. At the end of each batch, the rows in the batch are loaded into Snowflake. |
Expand All @@ -158,7 +159,7 @@ Full list of options in `config.json`:

### To run tests:

1. Define environment variables that requires running the tests
1. Define the environment variables that are required to run the tests by creating a `.env` file in `tests/integration`, or by exporting the variables below.
```
export TARGET_SNOWFLAKE_ACCOUNT=<snowflake-account-name>
export TARGET_SNOWFLAKE_DBNAME=<snowflake-database-name>
Expand All @@ -168,6 +169,7 @@ Full list of options in `config.json`:
export TARGET_SNOWFLAKE_SCHEMA=<snowflake-schema>
export TARGET_SNOWFLAKE_AWS_ACCESS_KEY=<aws-access-key-id>
export TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY=<aws-access-secret-access-key>
export TARGET_SNOWFLAKE_S3_ACL=<s3-target-acl>
export TARGET_SNOWFLAKE_S3_BUCKET=<s3-external-bucket>
export TARGET_SNOWFLAKE_S3_KEY_PREFIX=<bucket-directory>
export TARGET_SNOWFLAKE_STAGE=<stage-object-with-schema-name>
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
"test": [
"nose==1.3.7",
"mock==3.0.5",
"pylint==2.4.2"
"pylint==2.4.2",
"python-dotenv==0.14.0"
]
},
entry_points="""
Expand Down
10 changes: 7 additions & 3 deletions target_snowflake/db_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ def put_to_stage(self, file, stream, count, temp_dir=None):

# Generating key in S3 bucket
bucket = self.connection_config['s3_bucket']
s3_acl = self.connection_config.get('s3_acl')
s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
s3_key = "{}pipelinewise_{}_{}.csv".format(s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"))

Expand All @@ -392,19 +393,22 @@ def put_to_stage(self, file, stream, count, temp_dir=None):
)

# Upload to s3
extra_args = {'ACL': s3_acl} if s3_acl else dict()

# Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
metadata = {
extra_args['Metadata'] = {
'x-amz-key': encryption_metadata.key,
'x-amz-iv': encryption_metadata.iv
}
self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={'Metadata': metadata})
self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args)

# Remove the uploaded encrypted file
os.remove(encrypted_file)

# Upload to S3 without encrypting
else:
self.s3.upload_file(file, bucket, s3_key)
extra_args = {'ACL': s3_acl} if s3_acl else None
self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args)

return s3_key

Expand Down
15 changes: 15 additions & 0 deletions tests/integration/.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
TARGET_SNOWFLAKE_ACCOUNT=<snowflake-account-name>
TARGET_SNOWFLAKE_DBNAME=<snowflake-database-name>
TARGET_SNOWFLAKE_USER=<snowflake-user>
TARGET_SNOWFLAKE_PASSWORD=<snowfale-password>
TARGET_SNOWFLAKE_WAREHOUSE=<snowflake-warehouse>
TARGET_SNOWFLAKE_SCHEMA=<snowflake-schema>
TARGET_SNOWFLAKE_AWS_ACCESS_KEY=<aws-access-key-id>
TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY=<aws-access-secret-access-key>
TARGET_SNOWFLAKE_S3_ACL=<s3-target-acl>
TARGET_SNOWFLAKE_S3_BUCKET=<s3-external-bucket>
TARGET_SNOWFLAKE_S3_KEY_PREFIX=<bucket-directory>
TARGET_SNOWFLAKE_STAGE=<stage-object-with-schema-name>
TARGET_SNOWFLAKE_FILE_FORMAT=<file-format-object-with-schema-name>
CLIENT_SIDE_ENCRYPTION_MASTER_KEY=<client_side_encryption_master_key>
CLIENT_SIDE_ENCRYPTION_STAGE_OBJECT=<client_side_encryption_stage_object>
3 changes: 3 additions & 0 deletions tests/integration/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import json
from dotenv import load_dotenv

load_dotenv()

def get_db_config():
config = {}
Expand All @@ -26,6 +28,7 @@ def get_db_config():
config['aws_secret_access_key'] = os.environ.get('TARGET_SNOWFLAKE_AWS_SECRET_ACCESS_KEY')
config['s3_bucket'] = os.environ.get('TARGET_SNOWFLAKE_S3_BUCKET')
config['s3_key_prefix'] = os.environ.get('TARGET_SNOWFLAKE_S3_KEY_PREFIX')
config['s3_acl'] = os.environ.get('TARGET_SNOWFLAKE_S3_ACL')

# External stage in snowflake with client side encryption details
config['client_side_encryption_master_key'] = os.environ.get('CLIENT_SIDE_ENCRYPTION_MASTER_KEY')
Expand Down

0 comments on commit f912b9e

Please sign in to comment.