diff --git a/.github/workflows/run-integration-tests.yml b/.github/workflows/run-integration-tests.yml index e38d3c2..a770036 100644 --- a/.github/workflows/run-integration-tests.yml +++ b/.github/workflows/run-integration-tests.yml @@ -22,12 +22,12 @@ jobs: terraform_version: 1.3.3 - name: Terraform init run: | - cd ./examples/localstack + cd ./tests/integration_tests/infra terraform init - name: Terraform apply run: | - cd ./examples/localstack - terraform apply -target=module.integration_tests_data_qa.aws_s3_object.great_expectations_yml -target=module.integration_tests_data_qa.aws_s3_object.test_configs -target=module.integration_tests_data_qa.aws_s3_object.pipeline_config -target=module.integration_tests_data_qa.aws_s3_object.pks_config -target=module.integration_tests_data_qa.aws_s3_object.mapping_config -target=module.integration_tests_data_qa.aws_s3_object.expectations_store -target=module.integration_tests_data_qa.aws_s3_object.test_config_manifest -auto-approve + cd ./tests/integration_tests/infra + terraform apply -auto-approve - name: check localstack run: | curl http://localhost:4566/_localstack/health -i @@ -40,4 +40,4 @@ jobs: cd ./tests/integration_tests/test_data_tests docker build -t integration-tests:latest . - name: Run tests - run: docker run --env BUCKET=integration-test-bucket --env S3_HOST=172.17.0.1 --env S3_PORT=4566 integration-tests + run: docker run --env BUCKET=dqg-settings-local --env S3_HOST=172.17.0.1 --env S3_PORT=4566 integration-tests diff --git a/Makefile b/Makefile index bfe1084..fdc5b37 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,9 @@ run-localstack: docker run --rm -d -p 4566:4566 -p 4510-4559:4510-4559 localstack/localstack:1.3.1 deploy-qa-infra: - cd ./examples/localstack && \ + cd ./tests/integration_tests/infra && \ terraform init && \ - terraform apply -target=module.integration_tests_data_qa.aws_s3_object.great_expectations_yml -target=module.integration_tests_data_qa.aws_s3_object.test_configs -target=module.integration_tests_data_qa.aws_s3_object.pipeline_config -target=module.integration_tests_data_qa.aws_s3_object.pks_config -target=module.integration_tests_data_qa.aws_s3_object.mapping_config -target=module.integration_tests_data_qa.aws_s3_object.expectations_store -target=module.integration_tests_data_qa.aws_s3_object.test_config_manifest -auto-approve + terraform apply -auto-approve build-data-test-img: cd ./functions/data_test && \ @@ -23,7 +23,7 @@ build-unit-tests-img: build-data-test-img host := host.docker.internal port:= 4566 -qa_bucket = integration-test-bucket +qa_bucket = dqg-settings-local run-integration-tests: build-data-test-img build-data-test-tests-img cd $(integration_tests_dir) diff --git a/examples/localstack/README.md b/examples/localstack/README.md deleted file mode 100644 index 23e4748..0000000 --- a/examples/localstack/README.md +++ /dev/null @@ -1,5 +0,0 @@ -0. Install terraform -1. Navigate to `./examples/localstack` -2. Run localstack `docker run --rm -it -p 4566:4566 -p 4510-4559:4510-4559 localstack/localstack:1.3.1` -3. Run `terraform init` -4. Setup S3 environment `terraform apply -target=module.integration_tests_data_qa.aws_s3_object.great_expectations_yml -target=module.integration_tests_data_qa.aws_s3_object.test_configs -target=module.integration_tests_data_qa.aws_s3_object.pipeline_config -target=module.integration_tests_data_qa.aws_s3_object.pks_config -target=module.integration_tests_data_qa.aws_s3_object.mapping_config -target=module.integration_tests_data_qa.aws_s3_object.expectations_store -target=module.integration_tests_data_qa.aws_s3_object.test_config_manifest -auto-approve` \ No newline at end of file diff --git a/examples/localstack/main.tf b/examples/localstack/main.tf deleted file mode 100644 index 64cd57d..0000000 --- a/examples/localstack/main.tf +++ /dev/null @@ -1,31 +0,0 @@ -provider "aws" { - region = "us-west-2" - access_key = "local-access-key" - secret_key = "local-secret-key" - s3_force_path_style = true - - endpoints { - s3 = "http://localhost:4566" - sts = "http://localhost:4566" - } -} - -module "integration_tests_data_qa" { - source = "../../terraform" - - data_test_storage_bucket_name = "integration-test-bucket" - environment = "local" - - allure_report_image_uri = "" - data_test_image_uri = "" - push_report_image_uri = "" - - reports_subnet_id = "" - reports_vpc_id = "" - - lambda_private_subnet_ids = [] - lambda_security_group_ids = [] - - reports_whitelist_ips = [] -} - diff --git a/terraform/iam.tf b/terraform/iam.tf index 70b9490..cdc85a4 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -17,8 +17,8 @@ resource "aws_iam_policy" "basic_lambda_policy" { "s3:*" ], "Resource" : [ - "arn:aws:s3:::${aws_s3_bucket.settings_bucket.bucket}", - "arn:aws:s3:::${aws_s3_bucket.settings_bucket.bucket}/*", + "arn:aws:s3:::${module.s3_bucket.bucket_name}", + "arn:aws:s3:::${module.s3_bucket.bucket_name}/*", ] }, { diff --git a/terraform/lambda_allure_report.tf b/terraform/lambda_allure_report.tf index d11e0b0..eff7efb 100644 --- a/terraform/lambda_allure_report.tf +++ b/terraform/lambda_allure_report.tf @@ -10,7 +10,7 @@ module "lambda_allure_report" { environment_variables = merge({ ENVIRONMENT = var.environment - BUCKET = aws_s3_bucket.settings_bucket.bucket + BUCKET = module.s3_bucket.bucket_name REPORTS_WEB = module.reports_gateway.s3_gateway_address DYNAMODB_TABLE = aws_dynamodb_table.data_qa_report.name }, var.allure_report_extra_vars) diff --git a/terraform/lambda_data_test.tf b/terraform/lambda_data_test.tf index 4084b7c..255e348 100644 --- a/terraform/lambda_data_test.tf +++ b/terraform/lambda_data_test.tf @@ -10,7 +10,7 @@ module "lambda_data_test" { environment_variables = merge({ ENVIRONMENT = var.environment - BUCKET = aws_s3_bucket.settings_bucket.bucket + BUCKET = module.s3_bucket.bucket_name REPORTS_WEB = module.reports_gateway.s3_gateway_address DYNAMODB_TABLE = aws_dynamodb_table.data_qa_report.name REDSHIFT_DB = var.redshift_db_name diff --git a/terraform/lambda_push_report.tf b/terraform/lambda_push_report.tf index adf8603..05336e4 100644 --- a/terraform/lambda_push_report.tf +++ b/terraform/lambda_push_report.tf @@ -1,7 +1,7 @@ locals { default_push_report_env_vars = merge({ ENVIRONMENT = var.environment - BUCKET = aws_s3_bucket.settings_bucket.bucket + BUCKET = module.s3_bucket.bucket_name REPORTS_WEB = module.reports_gateway.s3_gateway_address DYNAMODB_TABLE = aws_dynamodb_table.data_qa_report.name JIRA_URL = var.lambda_push_jira_url diff --git a/terraform/modules.tf b/terraform/modules.tf index 9f1938d..41ac76a 100644 --- a/terraform/modules.tf +++ b/terraform/modules.tf @@ -6,6 +6,21 @@ module "athena-connector" { data_catalog_name = "dqg-dynamodb-connector-${var.environment}" } +module "s3_bucket" { + source = "./modules/s3-configs" + environment = var.environment + + data_test_storage_bucket_name = var.data_test_storage_bucket_name + test_coverage_path = var.test_coverage_path + pipeline_config_path = var.pipeline_config_path + pks_path = var.pks_path + sort_keys_path = var.sort_keys_path + mapping_path = var.mapping_path + expectations_store = var.expectations_store + manifest_path = var.manifest_path + great_expectation_path = var.great_expectation_path +} + module "basic_slack_alerting" { count = var.basic_alert_notification_settings == null ? 0 : 1 source = "./modules/alerting" @@ -22,9 +37,11 @@ module "basic_slack_alerting" { } module "reports_gateway" { - source = "./modules/s3-gateway" + source = "./modules/s3-gateway" + depends_on = [module.s3_bucket] + env = var.environment - bucket_name = aws_s3_bucket.settings_bucket.bucket + bucket_name = module.s3_bucket.bucket_name vpc_id = var.reports_vpc_id instance_subnet_id = var.reports_subnet_id diff --git a/terraform/s3.tf b/terraform/modules/s3-configs/main.tf similarity index 93% rename from terraform/s3.tf rename to terraform/modules/s3-configs/main.tf index feb42c0..76e28f1 100644 --- a/terraform/s3.tf +++ b/terraform/modules/s3-configs/main.tf @@ -21,11 +21,11 @@ resource "aws_s3_bucket_versioning" "fast-data-qa-bucket" { resource "aws_s3_object" "great_expectations_yml" { bucket = aws_s3_bucket.settings_bucket.bucket content_type = "application/x-yaml" - content = templatefile("${path.module}/../templates/great_expectations.yml", { + content = templatefile("${path.module}/${var.great_expectation_path}", { bucket = aws_s3_bucket.settings_bucket.bucket }) key = "${aws_s3_bucket.settings_bucket.bucket}/great_expectations/great_expectations.yml" - etag = md5(templatefile("${path.module}/../templates/great_expectations.yml", { + etag = md5(templatefile("${path.module}/${var.great_expectation_path}", { bucket = aws_s3_bucket.settings_bucket.bucket })) } @@ -76,12 +76,12 @@ resource "aws_s3_object" "expectations_store" { resource "aws_s3_object" "test_config_manifest" { bucket = aws_s3_bucket.settings_bucket.bucket - etag = md5(templatefile("${path.module}/../configs/manifest.json", { + etag = md5(templatefile("${path.module}/${var.manifest_path}", { env_name = var.environment, bucket_name = aws_s3_bucket.settings_bucket.bucket })) content_type = "application/json" - content = templatefile("${path.module}/../configs/manifest.json", + content = templatefile("${path.module}/${var.manifest_path}", { env_name = var.environment, bucket_name = aws_s3_bucket.settings_bucket.bucket @@ -165,4 +165,4 @@ resource "aws_s3_bucket_lifecycle_configuration" "delete_old_reports" { status = "Enabled" id = "great_expectations_uncommitted" } -} \ No newline at end of file +} diff --git a/terraform/modules/s3-configs/outputs.tf b/terraform/modules/s3-configs/outputs.tf new file mode 100644 index 0000000..e6caf19 --- /dev/null +++ b/terraform/modules/s3-configs/outputs.tf @@ -0,0 +1,3 @@ +output "bucket_name" { + value = aws_s3_bucket.settings_bucket.bucket +} \ No newline at end of file diff --git a/terraform/modules/s3-configs/variables.tf b/terraform/modules/s3-configs/variables.tf new file mode 100644 index 0000000..d3fa0ca --- /dev/null +++ b/terraform/modules/s3-configs/variables.tf @@ -0,0 +1,49 @@ +variable "environment" { + description = "Environment name used to build fully qualified tags and resource's names" + type = string +} + +variable "data_test_storage_bucket_name" { + description = "Bucket name which will be used to store data tests and settings for it's execution" + type = string +} + +variable "test_coverage_path" { + description = "Path to the tests description path, relative to the root TF" + type = string +} + +variable "pipeline_config_path" { + description = "Path to the pipeline description path, relative to the root TF" + type = string +} + +variable "pks_path" { + description = "Path to the primary keys description path, relative to the root TF" + type = string +} + +variable "sort_keys_path" { + description = "Path to the sort keys description path, relative to the root TF" + type = string +} + +variable "mapping_path" { + description = "Path to the mapping description path, relative to the root TF" + type = string +} + +variable "expectations_store" { + description = "Path to the expectations_store directory, relative to the root TF" + type = string +} + +variable "manifest_path" { + description = "Path to the manifests" + type = string +} + +variable "great_expectation_path" { + description = "Path to the great expectations yaml" + type = string +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf index caf00aa..239ae85 100644 --- a/terraform/outputs.tf +++ b/terraform/outputs.tf @@ -28,5 +28,5 @@ output "report_push_role_arn" { output "bucket" { description = "Data quality gate bucket with settings and generated tests" - value = aws_s3_bucket.settings_bucket.bucket + value = module.s3_bucket.bucket_name } \ No newline at end of file diff --git a/terraform/variables.tf b/terraform/variables.tf index 6e067e0..2f81847 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -14,7 +14,6 @@ variable "project" { variable "environment" { description = "Environment name used to build fully qualified tags and resource's names" type = string - default = "data-qa-dev" } variable "data_test_storage_bucket_name" { @@ -25,37 +24,49 @@ variable "data_test_storage_bucket_name" { variable "test_coverage_path" { description = "Path to the tests description path, relative to the root TF" type = string - default = "../configs/test_coverage.json" + default = "../../../configs/test_coverage.json" } variable "pipeline_config_path" { description = "Path to the pipeline description path, relative to the root TF" type = string - default = "../configs/pipeline.json" + default = "../../../configs/pipeline.json" } variable "pks_path" { description = "Path to the primary keys description path, relative to the root TF" type = string - default = "../configs/pks.json" + default = "../../../configs/pks.json" } variable "sort_keys_path" { description = "Path to the sort keys description path, relative to the root TF" type = string - default = "../configs/sort_keys.json" + default = "../../../configs/sort_keys.json" } variable "mapping_path" { description = "Path to the mapping description path, relative to the root TF" type = string - default = "../configs/mapping.json" + default = "../../../configs/mapping.json" } variable "expectations_store" { description = "Path to the expectations_store directory, relative to the root TF" type = string - default = "../expectations_store" + default = "../../../expectations_store" +} + +variable "manifest_path" { + description = "Path to the manifests" + type = string + default = "../../../configs/manifest.json" +} + +variable "great_expectation_path" { + description = "Path to the great expectations yaml" + type = string + default = "../../../templates/great_expectations.yml" } variable "lambda_allure_report_memory" { diff --git a/tests/integration_tests/test_data_tests/README.md b/tests/integration_tests/README.md similarity index 100% rename from tests/integration_tests/test_data_tests/README.md rename to tests/integration_tests/README.md diff --git a/examples/localstack/.gitignore b/tests/integration_tests/infra/.gitignore similarity index 100% rename from examples/localstack/.gitignore rename to tests/integration_tests/infra/.gitignore diff --git a/tests/integration_tests/infra/README.md b/tests/integration_tests/infra/README.md new file mode 100644 index 0000000..5127af1 --- /dev/null +++ b/tests/integration_tests/infra/README.md @@ -0,0 +1,5 @@ +0. Install terraform +1. Navigate to `./tests/integration_tests/infra` +2. Run localstack `docker run --rm -it -p 4566:4566 -p 4510-4559:4510-4559 localstack/localstack:1.3.1` +3. Run `terraform init` +4. Setup S3 environment `terraform apply -auto-approve` \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/manifest.json b/tests/integration_tests/infra/configs/manifest.json new file mode 100644 index 0000000..90e9d81 --- /dev/null +++ b/tests/integration_tests/infra/configs/manifest.json @@ -0,0 +1,12 @@ +{ + "fileLocations":[ + { + "URIPrefixes":[ + "s3://${bucket_name}/test_configs/test_coverage.json" + ] + } + ], + "globalUploadSettings":{ + "format":"JSON" + } +} \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/mapping.json b/tests/integration_tests/infra/configs/mapping.json new file mode 100644 index 0000000..51f7a20 --- /dev/null +++ b/tests/integration_tests/infra/configs/mapping.json @@ -0,0 +1,6 @@ +{ + "sample": { + "CALL ID": "call id", + "new_col_added0": "year_month" + } +} \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/pipeline.json b/tests/integration_tests/infra/configs/pipeline.json new file mode 100644 index 0000000..934e5bf --- /dev/null +++ b/tests/integration_tests/infra/configs/pipeline.json @@ -0,0 +1,12 @@ +{ + "run_name": + { + "engine": "s3", + "old_suite_name": "planner_raw_softfail", + "reuse_suite": false, + "use_old_suite_only": false, + "autobug": false, + "only_failed": true + } + +} \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/pks.json b/tests/integration_tests/infra/configs/pks.json new file mode 100644 index 0000000..e192edf --- /dev/null +++ b/tests/integration_tests/infra/configs/pks.json @@ -0,0 +1,9 @@ +{ + "db.table": { + "primaryKey": [ + "ACCOUNT_TYPE", + "ACCOUNT_NUMBER", + "RECORD_NUMBER" + ] + } +} \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/sort_keys.json b/tests/integration_tests/infra/configs/sort_keys.json new file mode 100644 index 0000000..3beab44 --- /dev/null +++ b/tests/integration_tests/infra/configs/sort_keys.json @@ -0,0 +1,7 @@ +{ + "db.table": { + "sortKey": [ + "update_dt" + ] + } +} \ No newline at end of file diff --git a/tests/integration_tests/infra/configs/test_coverage.json b/tests/integration_tests/infra/configs/test_coverage.json new file mode 100644 index 0000000..294cd41 --- /dev/null +++ b/tests/integration_tests/infra/configs/test_coverage.json @@ -0,0 +1,5 @@ +{ + "sample": { + "complexSuite": true + } +} \ No newline at end of file diff --git a/tests/integration_tests/infra/main.tf b/tests/integration_tests/infra/main.tf new file mode 100644 index 0000000..5835d9a --- /dev/null +++ b/tests/integration_tests/infra/main.tf @@ -0,0 +1,26 @@ +provider "aws" { + region = "us-west-2" + access_key = "local-access-key" + secret_key = "local-secret-key" + s3_force_path_style = true + + endpoints { + s3 = "http://localhost:4566" + sts = "http://localhost:4566" + } +} + +module "s3_configs" { + source = "../../../terraform/modules/s3-configs" + environment = "local" + data_test_storage_bucket_name = "dqg-settings-local" + + test_coverage_path = "../../../tests/integration_tests/infra/configs/test_coverage.json" + pipeline_config_path = "../../../tests/integration_tests/infra/configs/pipeline.json" + pks_path = "../../../tests/integration_tests/infra/configs/pks.json" + sort_keys_path = "../../../tests/integration_tests/infra/configs/sort_keys.json" + mapping_path = "../../../tests/integration_tests/infra/configs/mapping.json" + manifest_path = "../../../tests/integration_tests/infra/configs/manifest.json" + great_expectation_path = "../../../tests/integration_tests/infra/templates/great_expectations.yml" + expectations_store = "../../../tests/integration_tests/infra/expectations_store" +} diff --git a/tests/integration_tests/infra/templates/great_expectations.yml b/tests/integration_tests/infra/templates/great_expectations.yml new file mode 100644 index 0000000..18d6024 --- /dev/null +++ b/tests/integration_tests/infra/templates/great_expectations.yml @@ -0,0 +1,73 @@ +config_version: 2.0 + +datasources: + pandas_s3: + class_name: PandasDatasource + batch_kwargs_generators: + pandas_s3_generator: + class_name: S3GlobReaderBatchKwargsGenerator + bucket: ${bucket} + assets: + your_first_data_asset_name: + prefix: data/ # trailing slash is important + regex_filter: .* # The regex filter will filter the results returned by S3 for the key and prefix to only those matching the regex + module_name: great_expectations.datasource + data_asset_type: + class_name: PandasDataset + module_name: great_expectations.dataset + +config_variables_file_path: great_expectations/uncommitted/config_variables.yml + +plugins_directory: great_expectations/plugins/ + +validation_operators: + action_list_operator: + class_name: ActionListValidationOperator + action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + +stores: + expectations_S3_store: + class_name: ExpectationsStore + store_backend: + class_name: TupleS3StoreBackend + bucket: '${bucket}' + prefix: '${bucket}/great_expectations/expectations/' + + validations_S3_store: + class_name: ValidationsStore + store_backend: + class_name: TupleS3StoreBackend + bucket: '${bucket}' + prefix: '${bucket}/great_expectations/uncommitted/validations/' + + evaluation_parameter_store: + class_name: EvaluationParameterStore + +expectations_store_name: expectations_S3_store +validations_store_name: validations_S3_store +evaluation_parameter_store_name: evaluation_parameter_store + +data_docs_sites: + s3_site: + class_name: SiteBuilder + show_how_to_buttons: false + store_backend: + class_name: TupleS3StoreBackend + bucket: ${bucket} + prefix: "data_docs/" + site_index_builder: + class_name: DefaultSiteIndexBuilder + +anonymous_usage_statistics: + data_context_id: f6a15c13-c249-416b-8beb-2e540a245773 + enabled: true +notebooks: diff --git a/examples/localstack/versions.tf b/tests/integration_tests/infra/versions.tf similarity index 98% rename from examples/localstack/versions.tf rename to tests/integration_tests/infra/versions.tf index ae129d9..fc5a627 100644 --- a/examples/localstack/versions.tf +++ b/tests/integration_tests/infra/versions.tf @@ -7,4 +7,4 @@ terraform { version = "= 4.67.0" } } -} \ No newline at end of file +}