diff --git a/.gitignore b/.gitignore index 1ef10d2..757da83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ .aws-sam +.python-version samconfig.toml data/ auth.json metadata.json +*.png # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab59ab4..f3b13f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,4 +9,4 @@ repos: # supported by your project here, or alternatively use # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version - language_version: python3.11 \ No newline at end of file + language_version: python3.9 diff --git a/events/event-dashboard-chart-request.json b/events/event-dashboard-chart-request.json deleted file mode 100644 index 7f64ee1..0000000 --- a/events/event-dashboard-chart-request.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "resource": "/poc/{subscription_id}", - "path": "/poc/106", - "httpMethod": "GET", - "headers": { - "Accept": "*/*", - "Accept-Encoding": "gzip, deflate, br", - "CloudFront-Forwarded-Proto": "https", - "CloudFront-Is-Desktop-Viewer": "true", - "CloudFront-Is-Mobile-Viewer": "false", - "CloudFront-Is-SmartTV-Viewer": "false", - "CloudFront-Is-Tablet-Viewer": "false", - "CloudFront-Viewer-ASN": "40127", - "CloudFront-Viewer-Country": "US", - "Host": "vnkfm60p0c.execute-api.us-east-1.amazonaws.com", - "Postman-Token": "54b34c48-90bc-4b65-9263-3697df3fa9b9", - "User-Agent": "PostmanRuntime/7.30.1", - "Via": "1.1 405b8ed0c1df92e14644e6db88a3af5a.cloudfront.net (CloudFront)", - "X-Amz-Cf-Id": "HgPndrLdHrunAoX3ZAG1KRZwjiYh-opniHaA0JBnoOQhmmcPAujdcw==", - "x-amz-date": "20230207T144804Z", - "X-Amz-Security-Token": "FwoGZXIvYXdzEDUaDBjGb+pyZw9caRguUSLxAS1GRd64gpBoHhS8TpHCC1lWZezzzzsMFIdoUxhLU4A5NYXS+c35U1fUqC6uJGNfvLFiPjcHf9vytHiuVDwMH5omfMO6n6LeWmNLMkLYgingdkmiAueVVO7G7og2VKA3+fVYf+9GPf4Odw/C89k5C+fDJXJLYOncqxV0xxc/9SyruTBt5fYZbz/2/+LuGmHDlb+8/8w4kJfpmnvIp0TaAA21B5CnV1n3IAZc27yRmSDrfbquH70E+VHrKAod9bukdsQiHZRo1qoWWLBVE4jfDiLRrzNzMgDyOK7YBqN//kjqdBLva2HWlsiLR9+IiS+nAu8oyN7xnQYyMlJUGImUNFjhQePDiSEPdnXx4Jp83bDyO3S8okQa0+9qpDoTMpmkYqrB69j5boWg5Gk+", - "X-Amzn-Trace-Id": "Root=1-63e264a5-12016be54cfd8ff21d2350a3", - "X-Forwarded-For": "134.174.21.171, 15.158.52.7", - "X-Forwarded-Port": "443", - "X-Forwarded-Proto": "https" - }, - "multiValueHeaders": { - "Accept": [ - "*/*" - ], - "Accept-Encoding": [ - "gzip, deflate, br" - ], - "CloudFront-Forwarded-Proto": [ - "https" - ], - "CloudFront-Is-Desktop-Viewer": [ - "true" - ], - "CloudFront-Is-Mobile-Viewer": [ - "false" - ], - "CloudFront-Is-SmartTV-Viewer": [ - "false" - ], - "CloudFront-Is-Tablet-Viewer": [ - "false" - ], - "CloudFront-Viewer-ASN": [ - "40127" - ], - "CloudFront-Viewer-Country": [ - "US" - ], - "Host": [ - "vnkfm60p0c.execute-api.us-east-1.amazonaws.com" - ], - "Postman-Token": [ - "54b34c48-90bc-4b65-9263-3697df3fa9b9" - ], - "User-Agent": [ - "PostmanRuntime/7.30.1" - ], - "Via": [ - "1.1 405b8ed0c1df92e14644e6db88a3af5a.cloudfront.net (CloudFront)" - ], - "X-Amz-Cf-Id": [ - "HgPndrLdHrunAoX3ZAG1KRZwjiYh-opniHaA0JBnoOQhmmcPAujdcw==" - ], - "x-amz-date": [ - "20230207T144804Z" - ], - "X-Amz-Security-Token": [ - "FwoGZXIvYXdzEDUaDBjGb+pyZw9caRguUSLxAS1GRd64gpBoHhS8TpHCC1lWZezzzzsMFIdoUxhLU4A5NYXS+c35U1fUqC6uJGNfvLFiPjcHf9vytHiuVDwMH5omfMO6n6LeWmNLMkLYgingdkmiAueVVO7G7og2VKA3+fVYf+9GPf4Odw/C89k5C+fDJXJLYOncqxV0xxc/9SyruTBt5fYZbz/2/+LuGmHDlb+8/8w4kJfpmnvIp0TaAA21B5CnV1n3IAZc27yRmSDrfbquH70E+VHrKAod9bukdsQiHZRo1qoWWLBVE4jfDiLRrzNzMgDyOK7YBqN//kjqdBLva2HWlsiLR9+IiS+nAu8oyN7xnQYyMlJUGImUNFjhQePDiSEPdnXx4Jp83bDyO3S8okQa0+9qpDoTMpmkYqrB69j5boWg5Gk+" - ], - "X-Amzn-Trace-Id": [ - "Root=1-63e264a5-12016be54cfd8ff21d2350a3" - ], - "X-Forwarded-For": [ - "134.174.21.171, 15.158.52.7" - ], - "X-Forwarded-Port": [ - "443" - ], - "X-Forwarded-Proto": [ - "https" - ] - }, - "queryStringParameters": { - "column": "age_group", - "filter": "age_group:lte:10", - "stratifier": "covid_pcr_result" - }, - "multiValueQueryStringParameters": { - "column": [ - "age_group" - ], - "filter": [ - "age_group:lte:10" - ], - "stratifier": [ - "covid_pcr_result" - ] - }, - "pathParameters": { - "subscription_id": "106" - }, - "stageVariables": "None", - "requestContext": { - "resourceId": "qqgokr", - "resourcePath": "/poc/{subscription_id}", - "httpMethod": "GET", - "extendedRequestId": "f-Sp0ExFoAMFcBw=", - "requestTime": "07/Feb/2023:14:48:05 +0000", - "path": "/Prod/poc/106", - "accountId": "316333106222", - "protocol": "HTTP/1.1", - "stage": "Prod", - "domainPrefix": "vnkfm60p0c", - "requestTimeEpoch": 1675781285028, - "requestId": "06509c3d-d51a-4291-bf67-de7c7270a4d8", - "identity": { - "cognitoIdentityPoolId": "None", - "accountId": "None", - "cognitoIdentityId": "None", - "caller": "None", - "sourceIp": "134.174.21.171", - "principalOrgId": "None", - "accessKey": "None", - "cognitoAuthenticationType": "None", - "cognitoAuthenticationProvider": "None", - "userArn": "None", - "userAgent": "PostmanRuntime/7.30.1", - "user": "None" - }, - "domainName": "vnkfm60p0c.execute-api.us-east-1.amazonaws.com", - "apiId": "vnkfm60p0c" - }, - "body": "None", - "isBase64Encoded": false -} \ No newline at end of file diff --git a/events/event-get-chart-data.json b/events/event-get-chart-data.json new file mode 100644 index 0000000..401375b --- /dev/null +++ b/events/event-get-chart-data.json @@ -0,0 +1,125 @@ +{ + "resource": "/chart_data/{subscription_name}", + "path": "/chart_data/covid__encounter", + "httpMethod": "GET", + "headers": { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "CloudFront-Forwarded-Proto": "https", + "CloudFront-Is-Desktop-Viewer": "true", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Is-Tablet-Viewer": "false", + "CloudFront-Viewer-ASN": "40127", + "CloudFront-Viewer-Country": "US", + "Host": "effmuaxft2.execute-api.us-east-1.amazonaws.com", + "Postman-Token": "10f731c3-bb8d-4720-971c-7fd5e7cf0065", + "User-Agent": "PostmanRuntime/7.31.3", + "Via": "1.1 3ae9464b3a12f9a00e97e3c81ee98466.cloudfront.net (CloudFront)", + "X-Amz-Cf-Id": "JWyqc8QN24nqpUdGuej_gc35vpExxV017XJD8EXxM8x1OupUzCGYbg==", + "X-Amzn-Trace-Id": "Root=1-641c7f2c-5c612f3e389094297688efa0", + "X-Forwarded-For": "134.174.21.156, 70.132.21.75", + "X-Forwarded-Port": "443", + "X-Forwarded-Proto": "https" + }, + "multiValueHeaders": { + "Accept": [ + "*/*" + ], + "Accept-Encoding": [ + "gzip, deflate, br" + ], + "CloudFront-Forwarded-Proto": [ + "https" + ], + "CloudFront-Is-Desktop-Viewer": [ + "true" + ], + "CloudFront-Is-Mobile-Viewer": [ + "false" + ], + "CloudFront-Is-SmartTV-Viewer": [ + "false" + ], + "CloudFront-Is-Tablet-Viewer": [ + "false" + ], + "CloudFront-Viewer-ASN": [ + "40127" + ], + "CloudFront-Viewer-Country": [ + "US" + ], + "Host": [ + "effmuaxft2.execute-api.us-east-1.amazonaws.com" + ], + "Postman-Token": [ + "10f731c3-bb8d-4720-971c-7fd5e7cf0065" + ], + "User-Agent": [ + "PostmanRuntime/7.31.3" + ], + "Via": [ + "1.1 3ae9464b3a12f9a00e97e3c81ee98466.cloudfront.net (CloudFront)" + ], + "X-Amz-Cf-Id": [ + "JWyqc8QN24nqpUdGuej_gc35vpExxV017XJD8EXxM8x1OupUzCGYbg==" + ], + "X-Amzn-Trace-Id": [ + "Root=1-641c7f2c-5c612f3e389094297688efa0" + ], + "X-Forwarded-For": [ + "134.174.21.156, 70.132.21.75" + ], + "X-Forwarded-Port": [ + "443" + ], + "X-Forwarded-Proto": [ + "https" + ] + }, + "queryStringParameters": { + "column": "gender" + }, + "multiValueQueryStringParameters": { + "column": [ + "gender" + ] + }, + "pathParameters": { + "subscription_name": "covid__encounter" + }, + "stageVariables": "None", + "requestContext": { + "resourceId": "ta1ris", + "resourcePath": "/chart_data/{subscription_name}", + "httpMethod": "GET", + "extendedRequestId": "CPjPEHgQoAMFZ8A=", + "requestTime": "23/Mar/2023:16:32:44 +0000", + "path": "/dev/chart_data/covid__encounter", + "accountId": "316333106222", + "protocol": "HTTP/1.1", + "stage": "dev", + "domainPrefix": "effmuaxft2", + "requestTimeEpoch": 1679589164970, + "requestId": "ec3e7961-c978-45b5-af6b-46b70bcd9d7c", + "identity": { + "cognitoIdentityPoolId": "None", + "accountId": "None", + "cognitoIdentityId": "None", + "caller": "None", + "sourceIp": "134.174.21.156", + "principalOrgId": "None", + "accessKey": "None", + "cognitoAuthenticationType": "None", + "cognitoAuthenticationProvider": "None", + "userArn": "None", + "userAgent": "PostmanRuntime/7.31.3", + "user": "None" + }, + "domainName": "effmuaxft2.execute-api.us-east-1.amazonaws.com", + "apiId": "effmuaxft2" + }, + "body": "None", + "isBase64Encoded": false +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..db44f85 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[project] +name = "aggregator" +version = "0.1.0" +# This project is designed to run on the AWS serverless application framework (SAM). +# The project dependencies are handled via AWS layers. These are only required for +# local development. +dependencies= [ + "awswrangler", + "boto3", + "pandas" +] +authors = [ + { name="Matt Garber", email="matthew.garber@childrens.harvard.edu" }, +] +description = "Aggregates data from distributed hopsitals for display in Cumulus dashboard" +readme = "README.md" +license = { text="Apache License 2.0" } +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[project.urls] +"Homepage" = "https://github.com/smart-on-fhir/cumulus-aggregator" + +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools ~=63.2.0", + "wheel ~=0.37.1", +] + +[project.optional-dependencies] +test = [ + # TODO: We are investigating some new features around athena mocks, + # which currently only exist in 4.1.5 and are broken in 4.1.6dev. Swing back + # around on this as the functionality matures. + "moto[s3,athena] == 4.1.5", + "pytest", + "pytest-mock" +] +dev = [ + "bandit", + "black", + "pre-commit", + "pylint", + "pycodestyle" + +] diff --git a/scripts/__pycache__/presigned_post_test.cpython-310-pytest-7.2.0.pyc b/scripts/__pycache__/presigned_post_test.cpython-310-pytest-7.2.0.pyc new file mode 100644 index 0000000..f1cd12b Binary files /dev/null and b/scripts/__pycache__/presigned_post_test.cpython-310-pytest-7.2.0.pyc differ diff --git a/scripts/cumulus_upload_data.py b/scripts/cumulus_upload_data.py index 72a1f6e..32a887f 100755 --- a/scripts/cumulus_upload_data.py +++ b/scripts/cumulus_upload_data.py @@ -25,16 +25,18 @@ def upload_file(args): try: api_client = boto3.client("apigateway") res = api_client.get_rest_apis() - site_api_dict = list( + api_dict = list( filter( lambda x: "cumulus-aggregator-dev" in x["tags"]["aws:cloudformation:stack-name"], res["items"], ) ) - api_id = site_api_dict[0]["id"] - url = f"https://{api_id}.execute-api.us-east-1.amazonaws.com/dev/" - + for api in api_dict: + if api["name"] == "CumulusAggregatorSiteAPI": + url = ( + f"https://{api['id']}.execute-api.us-east-1.amazonaws.com/dev/" + ) except: print("No response recieved from AWS API gateway.") exit(1) diff --git a/src/handlers/dashboard/get_chart_data.py b/src/handlers/dashboard/get_chart_data.py index 8ed3bd8..370a86e 100644 --- a/src/handlers/dashboard/get_chart_data.py +++ b/src/handlers/dashboard/get_chart_data.py @@ -24,8 +24,8 @@ def _get_table_cols(table_name: str) -> List: """ s3_bucket_name = os.environ.get("BUCKET_NAME") s3_key = ( - f"{BucketPath.CSVAGGREGATE.value}/{table_name.split('_')[0]}" - f"/{table_name}/{table_name}_aggregate.csv" + f"{BucketPath.CSVAGGREGATE.value}/{table_name.split('__')[0]}" + f"/{table_name}/{table_name}__aggregate.csv" ) s3_client = boto3.client("s3") s3_iter = s3_client.get_object( @@ -54,13 +54,14 @@ def _build_query(query_params: Dict, filters: List, path_params: Dict) -> str: group_str = f"{query_params['stratifier']}, {group_str}" columns.remove(query_params["stratifier"]) if len(columns) > 0: - coalesce_str = f"WHERE COALESCE ({','.join(columns)}) = '' AND" + coalesce_str = f"WHERE COALESCE ({','.join(columns)}) IS NOT Null AND" else: coalesce_str = "WHERE" query_str = ( - f"SELECT {select_str} FROM {table} " # nosec + f"SELECT {select_str} " # nosec + f"FROM \"{os.environ.get('GLUE_DB_NAME')}\".\"{table}\" " f"{coalesce_str} " - f"{query_params['column']} != '' {filter_str} " + f"{query_params['column']} IS NOT Null {filter_str} " f"GROUP BY {group_str}" ) return query_str @@ -100,8 +101,9 @@ def chart_data_handler(event, context): query = _build_query(query_params, filters, path_params) df = awswrangler.athena.read_sql_query( query, - database="cumulus-aggregator-db", - s3_output="s3://cumulus-aggregator-site-counts/awswrangler", + database=os.environ.get("GLUE_DB_NAME"), + s3_output=f"s3://{os.environ.get('BUCKET_NAME')}/awswrangler", + workgroup=os.environ.get("WORKGROUP_NAME"), ) res = _format_payload(df, query_params, filters) res = http_response(200, res) diff --git a/src/handlers/dashboard/get_subscriptions.py b/src/handlers/dashboard/get_subscriptions.py new file mode 100644 index 0000000..c6f0e15 --- /dev/null +++ b/src/handlers/dashboard/get_subscriptions.py @@ -0,0 +1,34 @@ +""" Lambda for retrieving list of available subscriptions +""" + +import os + +import awswrangler +import boto3 + +from src.handlers.shared.decorators import generic_error_handler +from src.handlers.shared.functions import http_response + + +@generic_error_handler(msg="Error retrieving subscriptions") +def subscriptions_handler(event, context): + """Retrieves list of subscriptions in Athena DB.""" + del event + del context + boto3.setup_default_session(region_name="us-east-1") + db = os.environ.get("GLUE_DB_NAME") + # TODO: If awswrangler adds support for SHOW TABLE sql queries, cut this + # query over to that format - it's much faster. + df = awswrangler.athena.read_sql_query( + ( + f"SELECT table_name FROM information_schema.tables " + f"WHERE table_schema = '{db}'" # nosec + ), + database=db, + s3_output=f"s3://{os.environ.get('BUCKET_NAME')}/awswrangler", + workgroup=os.environ.get("WORKGROUP_NAME"), + ) + # df.iloc[:, 0] is getting the first pandas Series from the awswrangler datafame. + # Should be insulated from the change mentioned above + res = http_response(200, df.iloc[:, 0].to_json(orient="values")) + return res diff --git a/src/handlers/site_upload/powerset_merge.py b/src/handlers/site_upload/powerset_merge.py index ec660fc..6f39af8 100644 --- a/src/handlers/site_upload/powerset_merge.py +++ b/src/handlers/site_upload/powerset_merge.py @@ -95,7 +95,12 @@ def concat_sets(df: pandas.DataFrame, file_path: str) -> pandas.DataFrame: # but at some point, we may have different kinds of counts, like "cnt_encounter". # We'll need to modify this once we know a bit more about the final design. data_cols.remove("cnt") - return pandas.concat([df, site_df]).groupby(data_cols).sum().reset_index() + return ( + pandas.concat([df, site_df]) + .groupby(data_cols, dropna=False) + .sum() + .reset_index() + ) def get_site_filename_suffix(s3_path: str): diff --git a/template.yaml b/template.yaml index 11f5aeb..5a8e6c5 100644 --- a/template.yaml +++ b/template.yaml @@ -70,7 +70,7 @@ Resources: PowersetMergeFunction: Type: AWS::Serverless::Function Properties: - FunctionName: !Sub 'CumulusAggPowersetMerge--${DeployStage}' + FunctionName: !Sub 'CumulusAggPowersetMerge-${DeployStage}' Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] Handler: src/handlers/site_upload/powerset_merge.powerset_merge_handler Runtime: python3.9 @@ -99,6 +99,8 @@ Resources: Environment: Variables: BUCKET_NAME: !Sub '${BucketNameParameter}-${DeployStage}' + GLUE_DB_NAME: !Sub '${GlueNameParameter}-${DeployStage}' + WORKGROUP_NAME: !Sub '${AthenaWorkgroupNameParameter}-${DeployStage}' Events: GetChartDataAPI: Type: Api @@ -108,7 +110,7 @@ Resources: Method: GET Policies: - S3CrudPolicy: - BucketName: !Sub '${BucketNameParameter}-${DeployStage}' + BucketName: !Ref AggregatorBucket - Statement: - Sid: GluePermissionsPolicy Effect: Allow @@ -117,18 +119,14 @@ Resources: - glue:*Partition* Resource: - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:catalog' - - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${GlueNameParameter}-db-${DeployStage}' - - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${GlueNameParameter}-db-${DeployStage}/*' + - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${GlueNameParameter}-${DeployStage}' + - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${GlueNameParameter}-${DeployStage}/*' - Statement: - Sid: AthenaExecuteQueryPermissionsPolicy Effect: Allow Action: - - athena:StartQueryExecution - - athena:GetQueryResults - - athena:GetWorkGroup - - athena:GetQueryExecution - - athena:StopQueryExecution - Resource: !Sub 'arn:aws:athena:*:${AWS::AccountId}:workgroup/${AthenaWorkgroupNameParameter}-${DeployStage}' + - athena:* + Resource: !Sub 'arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/${AthenaWorkgroupNameParameter}-${DeployStage}' DashboardGetMetadataFunction: Type: AWS::Serverless::Function @@ -169,8 +167,58 @@ Resources: Method: GET Policies: - S3ReadPolicy: - BucketName: !Ref BucketNameParameter + BucketName: !Sub '${BucketNameParameter}-${DeployStage}' + DashboardGetSubscriptionsFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: !Sub 'CumulusAggDashboardGetSubscriptions-${DeployStage}' + Layers: [arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python39:1] + Handler: src/handlers/dashboard/get_subscriptions.subscriptions_handler + Runtime: python3.9 + MemorySize: 512 + Timeout: 100 + Description: Retrieve data for chart display in Cumulus Dashboard + Environment: + Variables: + BUCKET_NAME: !Sub '${BucketNameParameter}-${DeployStage}' + GLUE_DB_NAME: !Sub '${GlueNameParameter}-${DeployStage}' + WORKGROUP_NAME: !Sub '${AthenaWorkgroupNameParameter}-${DeployStage}' + Events: + GetSubscriptionsAPI: + Type: Api + Properties: + RestApiId: !Ref DashboardApiGateway + Path: /subscriptions + Method: GET + # TODO: it :should: be possible to move these policies to a central role/policy + # set that can be referenced in multiple places; see + # https://stackoverflow.com/questions/64523817/aws-sam-multiple-functions-with-same-inline-policy + # However - this causes a lot of nested policy conflicts that might require a + # more comprehensive policy/role overhaul. + Policies: + - S3CrudPolicy: + BucketName: !Ref AggregatorBucket + - Statement: + - Sid: GlueSubscriptionsPolicy + Effect: Allow + Action: + - glue:*Table* + - glue:*Partition* + Resource: + - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:catalog' + - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${GlueNameParameter}-${DeployStage}' + - !Sub 'arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${GlueNameParameter}-${DeployStage}/*' + - Statement: + - Sid: AthenaSubscriptionsPolicy + Effect: Allow + Action: + - athena:StartQueryExecution + - athena:GetQueryResults + - athena:GetWorkGroup + - athena:GetQueryExecution + - athena:StopQueryExecution + Resource: !Sub 'arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/${AthenaWorkgroupNameParameter}-${DeployStage}' ### Lambda permissions @@ -209,7 +257,7 @@ Resources: Properties: CatalogId: !Ref AWS::AccountId DatabaseInput: - Name: !Sub '${GlueNameParameter}-db-${DeployStage}' + Name: !Sub '${GlueNameParameter}-${DeployStage}' Description: Database for serving data to Cumulus Dashboard GlueCrawler: @@ -240,7 +288,18 @@ Resources: - site_metadata/** - site_upload/** - + AthenaWorkGroup: + Type: AWS::Athena::WorkGroup + Properties: + Name: !Sub '${AthenaWorkgroupNameParameter}-${DeployStage}' + State: ENABLED + WorkGroupConfiguration: + EnforceWorkGroupConfiguration: True + PublishCloudWatchMetricsEnabled: True + EngineVersion: + SelectedEngineVersion: "Athena engine version 3" + ResultConfiguration: + OutputLocation: !Sub "s3://${BucketNameParameter}-${DeployStage}/athena/" ### IAM Roles @@ -284,6 +343,7 @@ Resources: - s3:PutObject Resource: !Sub "arn:aws:s3:::${BucketNameParameter}-${DeployStage}/aggregates/*" + ### API Gateways # If you need to enable logging at the API Gateway level, uncomment the nodes diff --git a/tests/conftest.py b/tests/conftest.py index bfe561b..24cc5b9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,7 @@ from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata -from tests.utils import get_mock_metadata, get_mock_auth, TEST_BUCKET, ITEM_COUNT +from tests.utils import get_mock_metadata, ITEM_COUNT, TEST_BUCKET, TEST_GLUE_DB def _init_mock_data(s3_client, bucket_name, site, study, subscription): @@ -46,7 +46,30 @@ def mock_bucket(): s3.stop() -def test_mock_bucket(): +""" Leaving this unused here for now - there are some low level inconsistencies between moto + and AWS wrangler w.r.t. how workgroups are mocked out, but we might be able to + use this in the future/mock AWSwranger below the entrypoint if we are concerned. + + https://stackoverflow.com/a/73208335/5318482 discusses this a bit, but doesn't + adress mocking out the aws workgroup response (though setting the workgroup + to primary helped a bit since it has default permissions). +""" + + +@pytest.fixture +def mock_db(): + athena = mock_athena() + athena.start() + athena_client = boto3.client("athena", region_name="us-east-1") + athena_client.start_query_execution( + QueryString=f"create database {TEST_GLUE_DB}", + ResultConfiguration={"OutputLocation": f"s3://{TEST_BUCKET}/athena/"}, + ) + yield + athena.stop() + + +def test_mock_bucket(mock_bucket): s3_client = boto3.client("s3", region_name="us-east-1") item = s3_client.list_objects_v2(Bucket=TEST_BUCKET) assert (len(item["Contents"])) == ITEM_COUNT diff --git a/tests/dashboard/test_get_chart_data.py b/tests/dashboard/test_get_chart_data.py index f7625d3..643afe9 100644 --- a/tests/dashboard/test_get_chart_data.py +++ b/tests/dashboard/test_get_chart_data.py @@ -1,4 +1,5 @@ import json +import os import pandas import pytest @@ -6,6 +7,7 @@ from unittest import mock from src.handlers.dashboard import get_chart_data +from tests.utils import MOCK_ENV, TEST_BUCKET, TEST_GLUE_DB, TEST_WORKGROUP def mock_get_table_cols(name): @@ -22,6 +24,7 @@ def mock_data_frame(filter): @mock.patch( "src.handlers.dashboard.get_chart_data._get_table_cols", mock_get_table_cols ) +@mock.patch.dict(os.environ, MOCK_ENV) @pytest.mark.parametrize( "query_params,filters,path_params,query_str", [ @@ -29,24 +32,24 @@ def mock_data_frame(filter): {"column": "gender"}, [], {"subscription_name": "test_study"}, - "SELECT gender, sum(cnt) as cnt FROM test_study " - "WHERE COALESCE (race) = '' AND gender != '' " + f'SELECT gender, sum(cnt) as cnt FROM "{TEST_GLUE_DB}"."test_study" ' + "WHERE COALESCE (race) IS NOT Null AND gender IS NOT Null " "GROUP BY gender", ), ( {"column": "gender", "stratifier": "race"}, [], {"subscription_name": "test_study"}, - "SELECT race, gender, sum(cnt) as cnt FROM test_study " - "WHERE gender != '' " + f'SELECT race, gender, sum(cnt) as cnt FROM "{TEST_GLUE_DB}"."test_study" ' + "WHERE gender IS NOT Null " "GROUP BY race, gender", ), ( {"column": "gender"}, ["gender:strEq:female"], {"subscription_name": "test_study"}, - "SELECT gender, sum(cnt) as cnt FROM test_study " - "WHERE COALESCE (race) = '' AND gender != '' " + f'SELECT gender, sum(cnt) as cnt FROM "{TEST_GLUE_DB}"."test_study" ' + "WHERE COALESCE (race) IS NOT Null AND gender IS NOT Null " "AND gender LIKE 'female' " "GROUP BY gender", ), @@ -54,8 +57,8 @@ def mock_data_frame(filter): {"column": "gender", "stratifier": "race"}, ["gender:strEq:female"], {"subscription_name": "test_study"}, - "SELECT race, gender, sum(cnt) as cnt FROM test_study " - "WHERE gender != '' " + f'SELECT race, gender, sum(cnt) as cnt FROM "{TEST_GLUE_DB}"."test_study" ' + "WHERE gender IS NOT Null " "AND gender LIKE 'female' " "GROUP BY race, gender", ), diff --git a/tests/dashboard/test_get_subscriptions.py b/tests/dashboard/test_get_subscriptions.py new file mode 100644 index 0000000..731b886 --- /dev/null +++ b/tests/dashboard/test_get_subscriptions.py @@ -0,0 +1,30 @@ +import os + +from unittest import mock + +import awswrangler +import pandas + +from pytest_mock import MockerFixture + +from src.handlers.dashboard.get_subscriptions import subscriptions_handler +from tests.utils import get_mock_metadata, MOCK_ENV, SUBSCRIPTION_COUNT + + +def mock_response(*args, **kwargs): + meta = get_mock_metadata() + table_names = [] + for site in meta.keys(): + for study in meta[site].keys(): + for subscription in meta[site][study].keys(): + table_names.append(f"{study}__{subscription}") + return pandas.DataFrame.from_dict({"0": list(set(table_names))}) + + +@mock.patch.dict(os.environ, MOCK_ENV) +def test_get_subscriptions(mocker: MockerFixture): + mocker_read = mocker.patch("awswrangler.athena.read_sql_query") + mocker_read.side_effect = mock_response + res = subscriptions_handler({}, {}) + assert res["statusCode"] == 200 + assert SUBSCRIPTION_COUNT == 2 diff --git a/tests/site_upload/test_api_gateway_authorizer.py b/tests/site_upload/test_api_gateway_authorizer.py index 3e90a59..1b40258 100644 --- a/tests/site_upload/test_api_gateway_authorizer.py +++ b/tests/site_upload/test_api_gateway_authorizer.py @@ -3,12 +3,14 @@ from contextlib import nullcontext as does_not_raise +from pytest_mock import MockerFixture + from src.handlers.site_upload.api_gateway_authorizer import lambda_handler from tests.utils import get_mock_auth @pytest.fixture -def mocker_site_json(mocker): +def mocker_site_json(mocker: MockerFixture): mocked_site_json = mocker.mock_open(read_data=json.dumps(get_mock_auth())) mocker.patch("builtins.open", mocked_site_json) @@ -28,4 +30,4 @@ def test_validate_pw(auth, expects, mock_bucket, mocker_site_json): "methodArn": "arn:aws:execute-api:us-east-1:11223:123/Prod/post/lambda", } with expects: - auth = lambda_handler(event, {}) + res = lambda_handler(event, {}) diff --git a/tests/utils.py b/tests/utils.py index 8715c81..21ff2a5 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,18 @@ """Storage for state variables/methods shared by test modules""" + TEST_BUCKET = "cumulus-aggregator-site-counts-test" +TEST_WORKGROUP = "cumulus-aggregator-test-wg" +TEST_GLUE_DB = "cumulus-aggregator-test-db" ITEM_COUNT = 7 +SUBSCRIPTION_COUNT = 2 + +# This is a convenience for loading into os.environ with mock.patch.dict. +# Other cases should probably use the getter version below. +MOCK_ENV = { + "BUCKET_NAME": TEST_BUCKET, + "GLUE_DB_NAME": TEST_GLUE_DB, + "WORKGROUP_NAME": TEST_WORKGROUP, +} def get_mock_metadata(): @@ -57,3 +69,7 @@ def get_mock_auth(): # u/a: hope test3 "aG9wZTp0ZXN0Mw==": {"site": "chicago_hope"}, } + + +def get_mock_env(): + return MOCK_ENV