diff --git a/.github/ISSUE_TEMPLATE/onboarding.md b/.github/ISSUE_TEMPLATE/onboarding.md index 86b23a05d6..47b0a7d024 100644 --- a/.github/ISSUE_TEMPLATE/onboarding.md +++ b/.github/ISSUE_TEMPLATE/onboarding.md @@ -80,6 +80,8 @@ Note: If you're not able to do any of these yourself, you're still responsible f - [ ] [Add as a form manager to the touchpoints recruitment intercept](https://touchpoints.app.cloud.gov/admin/forms/9412c559/permissions) **For engineers, also...** +- [ ] Make sure you have a `login.gov` account and have logged into the FAC application at least once. + - [ ] Then, add your email to the `readonly` list in [staffusers.json](../../backend/config/staffusers.json). - [ ] [Add as a member of the FAC group in New Relic](https://one.newrelic.com/admin-portal/organizations/users-list) (@GSA-TTS/fac-admins can do this) **For product leads/owners, also...** @@ -88,5 +90,6 @@ Note: If you're not able to do any of these yourself, you're still responsible f - [ ] Also give them the `Maintainer` role in [the FAC-team team in GitHub](https://github.com/orgs/GSA-TTS/teams/fac-team/members). **For helpdesk, also...** -- [ ] Add them to the list of staff users for [Django Admin](https://app.fac.gov/admin/users/staffuser/). +- [ ] Make sure you have a `login.gov` account and have logged into the FAC application at least once. + - [ ] Then, add your email to the `helpdesk` list in [staffusers.json](../../backend/config/staffusers.json). - [ ] Give them access to the [Help Desk](https://fac-gov.zendesk.com/admin/people/team/members) as a team member. diff --git a/.github/workflows/deploy-application.yml b/.github/workflows/deploy-application.yml index 4f811e4a0c..dd19d7b79a 100644 --- a/.github/workflows/deploy-application.yml +++ b/.github/workflows/deploy-application.yml @@ -77,7 +77,7 @@ jobs: cf_password: ${{ secrets.CF_PASSWORD }} cf_org: gsa-tts-oros-fac cf_space: ${{ env.space }} - command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.5 deploy_backup" + command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.9 deploy_backup" - name: Deploy Preview to cloud.gov if: ${{ inputs.environment == 'preview' }} @@ -103,15 +103,6 @@ jobs: cf_vars_file: backend/manifests/vars/vars-${{ env.space }}.yml command: bin/ops/deploy.sh - - name: Load historical data - uses: cloud-gov/cg-cli-tools@main - with: - cf_username: ${{ secrets.CF_USERNAME }} - cf_password: ${{ secrets.CF_PASSWORD }} - cf_org: gsa-tts-oros-fac - cf_space: ${{ env.space }} - command: cf run-task gsa-fac -k 6G -m 1G --name load_data --command "./load_data.sh" - # This has to happen after an application deployment because the manifest (currently) is responsible # for binding the "logdrain service" to the "gsa-fac application". This also needs to be done # based on the suspicion that fluentbit cannot register the incoming logs when it is initially @@ -132,5 +123,5 @@ jobs: secrets: inherit with: environment: ${{ inputs.environment }} - util_version: "v0.1.8" + util_version: "v0.1.9" backup_operation: "check_tables" diff --git a/.github/workflows/fac-api-scheduler.yml b/.github/workflows/fac-api-scheduler.yml new file mode 100644 index 0000000000..521dcae32f --- /dev/null +++ b/.github/workflows/fac-api-scheduler.yml @@ -0,0 +1,24 @@ +--- +name: API Refresh Schedule +on: + schedule: + # Invoke at 12:30 EST + - cron: '30 4 * * *' + workflow_dispatch: null + +jobs: + scheduled-api-standup: + strategy: + fail-fast: false + matrix: + environment: + - name: dev + - name: staging + - name: production + - name: preview + uses: ./.github/workflows/fac-api-standup.yml + secrets: inherit + with: + environment: ${{ matrix.environment.name }} + util_version: "v0.1.9" + backup_operation: "rds_backup" diff --git a/.github/workflows/fac-api-standup.yml b/.github/workflows/fac-api-standup.yml new file mode 100644 index 0000000000..96f3896a87 --- /dev/null +++ b/.github/workflows/fac-api-standup.yml @@ -0,0 +1,54 @@ +--- +name: Standup the API +on: + workflow_call: + inputs: + environment: + required: true + type: string + util_version: + description: Version for fac backup utility to use (ex. vX.Y.Z) + required: true + type: string + backup_operation: + description: Operation for fac-backup-utility + required: true + type: string + workflow_dispatch: + inputs: + environment: + required: true + type: choice + options: + - 'dev' + - 'preview' + - 'staging' + - 'production' + util_version: + description: Version for fac backup utility to use (ex. vX.Y.Z) + required: true + type: string + default: "v0.1.9" + backup_operation: + description: Operation for fac-backup-utility + required: true + type: choice + options: + - 'rds_backup' +jobs: + api-standup: + name: Standup API + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + env: + space: ${{ inputs.environment }} + steps: + - name: Backup FAC Database and refresh the api + uses: cloud-gov/cg-cli-tools@main + with: + cf_username: ${{ secrets.CF_USERNAME }} + cf_password: ${{ secrets.CF_PASSWORD }} + cf_org: gsa-tts-oros-fac + cf_space: ${{ env.space }} + command: cf run-task gsa-fac -k 2G -m 2G --name standup_api --command "./util/nightly_api_refresh.sh ${{ inputs.util_version }} ${{ inputs.backup_operation }}" + # command: cf run-task gsa-fac -k 2G -m 2G --name api_refresh --command "./util/nightly_api_refresh.sh v0.1.9 rds_backup" diff --git a/.github/workflows/fac-backup-scheduler.yml b/.github/workflows/fac-backup-scheduler.yml index 3daea83415..98e42e30c0 100644 --- a/.github/workflows/fac-backup-scheduler.yml +++ b/.github/workflows/fac-backup-scheduler.yml @@ -19,6 +19,5 @@ jobs: secrets: inherit with: environment: ${{ matrix.environment.name }} - util_version: "v0.1.8" + util_version: "v0.1.9" backup_operation: "scheduled_backup" - diff --git a/.github/workflows/fac-backup-util-scheduled.yml b/.github/workflows/fac-backup-util-scheduled.yml index d5992fac3c..4509e38fba 100644 --- a/.github/workflows/fac-backup-util-scheduled.yml +++ b/.github/workflows/fac-backup-util-scheduled.yml @@ -1,8 +1,8 @@ --- name: Backup the database with fac-backup-utility ### Common Commands: -# ./fac-backup-util.sh v0.1.8 scheduled_backup -# ./fac-backup-util.sh v0.1.8 daily_backup +# ./fac-backup-util.sh v0.1.9 scheduled_backup +# ./fac-backup-util.sh v0.1.9 daily_backup on: workflow_call: inputs: diff --git a/.github/workflows/fac-backup-util.yml b/.github/workflows/fac-backup-util.yml index 47eed32461..11a7caadc7 100644 --- a/.github/workflows/fac-backup-util.yml +++ b/.github/workflows/fac-backup-util.yml @@ -1,8 +1,8 @@ --- name: Backup the database with fac-backup-utility ### Common Commands: -# ./fac-backup-util.sh v0.1.8 initial_backup -# ./fac-backup-util.sh v0.1.8 deploy_backup +# ./fac-backup-util.sh v0.1.9 initial_backup +# ./fac-backup-util.sh v0.1.9 deploy_backup on: workflow_dispatch: inputs: @@ -18,6 +18,7 @@ on: description: Version for fac backup utility to use (ex. vX.Y.Z) required: true type: string + default: "v0.1.9" backup_operation: description: Operation for fac-backup-utility required: true diff --git a/.github/workflows/fac-check-tables-scheduler.yml b/.github/workflows/fac-check-tables-scheduler.yml index 111ca0029e..a1b22b5593 100644 --- a/.github/workflows/fac-check-tables-scheduler.yml +++ b/.github/workflows/fac-check-tables-scheduler.yml @@ -20,5 +20,5 @@ jobs: secrets: inherit with: environment: ${{ matrix.environment.name }} - util_version: "v0.1.8" + util_version: "v0.1.9" backup_operation: "check_tables" diff --git a/.github/workflows/fac-check-tables.yml b/.github/workflows/fac-check-tables.yml index f45d0f6003..0ce9c2d146 100644 --- a/.github/workflows/fac-check-tables.yml +++ b/.github/workflows/fac-check-tables.yml @@ -1,7 +1,7 @@ --- name: Check existing tables in an environment ### Common Commands: -# ./fac-backup-util.sh v0.1.8 check_tables +# ./fac-backup-util.sh v0.1.9 check_tables on: workflow_dispatch: inputs: diff --git a/backend/.gitignore b/backend/.gitignore new file mode 100644 index 0000000000..1d9488fd11 --- /dev/null +++ b/backend/.gitignore @@ -0,0 +1,2 @@ +*.tar.gz +config.json diff --git a/backend/.profile b/backend/.profile index 544a2ee1cc..c1952d2597 100644 --- a/backend/.profile +++ b/backend/.profile @@ -1,20 +1,14 @@ #!/bin/bash -# Source everything; everything is now a function. -# Remember: bash has no idea if a function exists, -# so a typo in a function name will fail silently. Similarly, -# bash has horrible scoping, so use of `local` in functions is -# critical for cleanliness in the startup script. +set +e + source tools/util_startup.sh -# This will choose the correct environment -# for local envs (LOCAL or TESTING) and cloud.gov source tools/setup_env.sh -source tools/api_teardown.sh +source tools/curation_audit_tracking_disable.sh +source tools/sling_bulk_export.sh source tools/migrate_app_tables.sh -source tools/api_standup.sh -source tools/run_collectstatic.sh source tools/seed_cog_baseline.sh -source tools/materialized_views.sh +source tools/sql_pre_post.sh ##### # SETUP THE CGOV ENVIRONMENT @@ -24,11 +18,13 @@ gonogo "setup_env" if [[ "$CF_INSTANCE_INDEX" == 0 ]]; then ##### - # API TEARDOWN - # API has to be deprecated/removed before migration, because - # of tight coupling between schema/views and the dissemination tables - api_teardown - gonogo "api_teardown" + # SQL PRE + # We have SQL that we want to run before the migrations and sling are run. + # This tears down things that would conflict with migrations, etc. + sql_pre_fac_db + gonogo "sql_pre_fac_db" + curation_audit_tracking_disable + gonogo "curation_audit_tracking_disable" ##### # MIGRATE APP TABLES @@ -36,16 +32,11 @@ if [[ "$CF_INSTANCE_INDEX" == 0 ]]; then gonogo "migrate_app_tables" ##### - # API STANDUP - # Standup the API, which may depend on migration changes - api_standup - gonogo "api_standup" - - ##### - # COLLECT STATIC - # Do Django things with static files. - # run_collectstatic - # gonogo "run_collectstatic" + # SQL POST + # Rebuild the API and prepare the system for execution. + # Runs after migrations. + sql_post_fac_db + gonogo "sql_post_fac_db" ##### # SEED COG/OVER TABLES @@ -53,8 +44,11 @@ if [[ "$CF_INSTANCE_INDEX" == 0 ]]; then seed_cog_baseline gonogo "seed_cog_baseline" - # materialized_views - # gonogo "materialized_views" + ##### + # CREATE STAFF USERS + # Prepares staff users for Django admin + python manage.py create_staffusers + gonogo "create_staffusers" fi # Make psql usable by scripts, for debugging, etc. diff --git a/backend/Dockerfile b/backend/Dockerfile index c9ab79ae02..2e53152aca 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -18,6 +18,7 @@ RUN apt-get -yq update && \ gcc \ gnupg \ gnupg2 \ + jq \ postgresql-client \ wget diff --git a/backend/Makefile b/backend/Makefile index 8e78d3466d..2d5f42a508 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -88,9 +88,9 @@ docker-lint: docker compose run web bash -c 'flake8 && black --check . && bandit -c pyproject.toml -r . && mypy . && djlint .' ghcr-first-run: - docker compose -f docker-compose-web.yml run web python manage.py makemigrations - docker compose -f docker-compose-web.yml run web python manage.py migrate - docker compose -f docker-compose-web.yml run web python manage.py loaddata + -docker compose -f docker-compose-web.yml run web python manage.py makemigrations + -docker compose -f docker-compose-web.yml run web python manage.py migrate + -docker compose -f docker-compose-web.yml run web python manage.py loaddata # Run Django tests with docker ghcr-test: @@ -109,15 +109,33 @@ ghcr-nctest: ghcr-lint: docker compose -f docker-compose-web.yml run web bash -c 'flake8 && black --check . && bandit -c pyproject.toml -r . && python -m pip install types-pytz && mypy . && djlint .' +remove-coverage: + -rm -f .coverage.* + +compose-down: + docker compose down + +remove-processes: + docker rm -f $$(docker ps -a -q) + +remove-volumes: + -docker volume rm $$(docker volume ls -q) + +remove-dangling-volumes: remove-volumes + -docker volume rm $$(docker volume ls -q --filter dangling=true) + +system-prune: + -docker system prune -f + +volume-prune: + -docker volume prune -f + docker-clean: docker compose down docker rm -f $(shell docker ps -a -q) docker volume rm $(shell docker volume ls -q) -docker-full-clean: - rm -f .coverage.* - docker compose down - docker rm -f $(docker ps -a -q) - docker volume rm $(docker volume ls -q) - docker system prune -f - docker volume prune -f +# Proceed past errors. +# We want all of these commands to run, even if one fails. +# make -i docker-full-clean +docker-full-clean: remove-coverage compose-down remove-processes remove-dangling-volumes system-prune volume-prune diff --git a/backend/census_historical_migration/README.md b/backend/census_historical_migration/README.md index e6985753ca..75a1ca5b86 100644 --- a/backend/census_historical_migration/README.md +++ b/backend/census_historical_migration/README.md @@ -20,7 +20,7 @@ This is implemented as a Django app to leverage existing management commands and - fac_s3.py - Uploads folders or files to an S3 bucket. ```bash -python manage.py fac_s3 gsa-fac-private-s3 --upload --src census_historical_migration/data +python manage.py fac_s3 fac-private-s3 --upload --src census_historical_migration/data ``` - csv_to_postgres.py - Inserts data into Postgres tables using the contents of the CSV files in the S3 bucket. The first row of each file is assumed to have the column names (we convert to lowercase). The name of the table is determined by examining the name of the file. The sample source files do not have delimters for empty fields at the end of a line - so we assume these are nulls. @@ -44,16 +44,16 @@ python manage.py csv_to_postgres --clean True 1. Download test Census data from https://drive.google.com/drive/folders/1TY-7yWsMd8DsVEXvwrEe_oWW1iR2sGoy into census_historical_migration/data folder. NOTE: Never check in the census_historical_migration/data folder into GitHub. -2. In the FAC/backend folder, run the following to load CSV files from census_historical_migration/data folder into gsa-fac-private-s3 bucket. +2. In the FAC/backend folder, run the following to load CSV files from census_historical_migration/data folder into fac-private-s3 bucket. ```bash docker compose run --rm web python manage.py fac_s3 \ - gsa-fac-private-s3 \ + fac-private-s3 \ --upload \ --src census_historical_migration/data ``` -3. In the FAC/backend folder, run the following to read the CSV files from gsa-fac-private-s3 bucket and load into Postgres. +3. In the FAC/backend folder, run the following to read the CSV files from fac-private-s3 bucket and load into Postgres. ```bash docker compose run --rm web python manage.py \ diff --git a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py index 09005e5f9f..95a2a48320 100644 --- a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py +++ b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py @@ -1,7 +1,7 @@ import argparse import time -from util import ( +from census_historical_migration.throwaway_scripts.util import ( trigger_migration_workflow, ) import subprocess # nosec diff --git a/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py b/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py index b7be200650..12a20a64f9 100644 --- a/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py +++ b/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py @@ -2,7 +2,9 @@ import subprocess # nosec import time -from util import trigger_migration_workflow +from census_historical_migration.throwaway_scripts.util import ( + trigger_migration_workflow, +) # This throwaway script spits out code that can be # copy-pasted into a bash script, or directly into the command line. diff --git a/backend/config/db_url.py b/backend/config/db_url.py index d84419065d..39ac0cbd6b 100644 --- a/backend/config/db_url.py +++ b/backend/config/db_url.py @@ -1,12 +1,10 @@ from django.core.exceptions import ImproperlyConfigured -def get_db_url_from_vcap_services( - vcap, -): +def get_db_url_from_vcap_services(vcap, db_instance_name="fac-db"): database_url = None for db_service in vcap.get("aws-rds", []): - if db_service.get("instance_name") == "fac-db": + if db_service.get("instance_name") == db_instance_name: database_url = db_service["credentials"]["uri"] break diff --git a/backend/config/settings.py b/backend/config/settings.py index c05f8b3d00..40ae90cffc 100644 --- a/backend/config/settings.py +++ b/backend/config/settings.py @@ -227,10 +227,23 @@ # Environment specific configurations DEBUG = False + if ENVIRONMENT not in ["SANDBOX", "DEVELOPMENT", "PREVIEW", "STAGING", "PRODUCTION"]: + + # FIXME: This is now identical between local and cloud.gov, because we have + # a "fake" VCAP_SERVICES environment variable. Local DBs and S3 buckets + # can be configured the same way as their cloud equivalents. This can be + # refactored for simpler config loading in the app. + # + # During a build, there won't be an environment variable. Load the + # fake VCAP from the filesystem. + vcap = json.load(open("config/vcap_services_for_containers.json")) + DATABASES = { - "default": env.dj_db_url( - "DATABASE_URL", default="postgres://postgres:password@0.0.0.0/backend" + "default": dj_database_url.parse(get_db_url_from_vcap_services(vcap, "fac-db")), + "fac-db": dj_database_url.parse(get_db_url_from_vcap_services(vcap, "fac-db")), + "fac-snapshot-db": dj_database_url.parse( + get_db_url_from_vcap_services(vcap, "fac-snapshot-db") ), } STORAGES = { @@ -255,7 +268,8 @@ CORS_ALLOWED_ORIGINS += ["http://0.0.0.0:8000", "http://127.0.0.1:8000"] # Private bucket - AWS_PRIVATE_STORAGE_BUCKET_NAME = "gsa-fac-private-s3" + AWS_PRIVATE_STORAGE_BUCKET_NAME = "fac-private-s3" + AWS_PUBLIC_STORAGE_BUCKET_NAME = "fac-public-s3" AWS_S3_PRIVATE_REGION_NAME = os.environ.get( "AWS_S3_PRIVATE_REGION_NAME", "us-east-1" @@ -263,9 +277,11 @@ # MinIO only matters for local development and GitHub action environments. # These should match what we're setting in backend/run.sh - AWS_PRIVATE_ACCESS_KEY_ID = os.environ.get("AWS_PRIVATE_ACCESS_KEY_ID", "longtest") + AWS_PRIVATE_ACCESS_KEY_ID = os.environ.get( + "AWS_PRIVATE_ACCESS_KEY_ID", "singleauditclearinghouse" + ) AWS_PRIVATE_SECRET_ACCESS_KEY = os.environ.get( - "AWS_PRIVATE_SECRET_ACCESS_KEY", "longtest" + "AWS_PRIVATE_SECRET_ACCESS_KEY", "singleauditclearinghouse" ) AWS_S3_PRIVATE_ENDPOINT = os.environ.get( "AWS_S3_PRIVATE_ENDPOINT", "http://minio:9000" @@ -297,8 +313,14 @@ vcap = json.loads(env.str("VCAP_SERVICES")) - DB_URL = get_db_url_from_vcap_services(vcap) - DATABASES = {"default": dj_database_url.parse(DB_URL)} + # DB_URL = get_db_url_from_vcap_services(vcap) + DATABASES = { + "default": dj_database_url.parse(get_db_url_from_vcap_services(vcap, "fac-db")), + "fac-db": dj_database_url.parse(get_db_url_from_vcap_services(vcap, "fac-db")), + "fac-snapshot-db": dj_database_url.parse( + get_db_url_from_vcap_services(vcap, "fac-snapshot-db") + ), + } for service in vcap["s3"]: if service["instance_name"] == "fac-public-s3": diff --git a/backend/config/staffusers.json b/backend/config/staffusers.json new file mode 100644 index 0000000000..fd8f32c2c9 --- /dev/null +++ b/backend/config/staffusers.json @@ -0,0 +1,21 @@ +{ + "readonly": [ + "alexander.steel@gsa.gov" + ], + "helpdesk": [ + "philip.dominguez@gsa.gov", + "robert.novak@gsa.gov", + "analyn.delossantos@gsa.gov", + "hassandeme.mamasambo@gsa.gov", + "iman.ali@gsa.gov", + "james.p.mason@gsa.gov", + "james.person@gsa.gov", + "leigh.cox@gsa.gov", + "laura.herring@gsa.gov", + "rochelle.ribeiro@gsa.gov" + ], + "superuser": [ + "matthew.jadud@gsa.gov", + "daniel.swick@gsa.gov" + ] +} diff --git a/backend/config/vcap_services_for_containers.json b/backend/config/vcap_services_for_containers.json new file mode 100644 index 0000000000..484ec221b6 --- /dev/null +++ b/backend/config/vcap_services_for_containers.json @@ -0,0 +1,118 @@ +{ + "s3": [ + { + "label": "s3", + "provider": "minio-local", + "plan": "basic", + "name": "fac-private-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDALPHA1", + "instance_name": "fac-private-s3", + "binding_guid": "UUIDALPHA2", + "binding_name": null, + "credentials": { + "uri": "http://minio:9000", + "port": 9000, + "insecure_skip_verify": false, + "access_key_id": "singleauditclearinghouse", + "secret_access_key": "singleauditclearinghouse", + "region": "us-east-1", + "bucket": "fac-private-s3", + "endpoint": "http://minio:9000", + "fips_endpoint": "http://minio:9000", + "additional_buckets": [] + }, + "syslog_drain_url": "https://ALPHA.drain.url", + "volume_mounts": [ + "no_mounts" + ] + }, + { + "label": "s3", + "provider": "minio-local", + "plan": "basic", + "name": "fac-public-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDALPHA1", + "instance_name": "fac-public-s3", + "binding_guid": "UUIDALPHA2", + "binding_name": null, + "credentials": { + "uri": "http://minio:9000", + "port": 9000, + "insecure_skip_verify": false, + "access_key_id": "singleauditclearinghouse", + "secret_access_key": "singleauditclearinghouse", + "region": "us-east-1", + "bucket": "fac-public-s3", + "endpoint": "http://minio:9000", + "fips_endpoint": "http://minio:9000", + "additional_buckets": [] + }, + "syslog_drain_url": "https://ALPHA.drain.url", + "volume_mounts": [ + "no_mounts" + ] + } + ], + "aws-rds": [ + { + "label": "fac-db", + "provider": null, + "plan": null, + "name": "fac-db", + "tags": [ + "database", + "docker" + ], + "instance_guid": "UUIDINDIA1", + "instance_name": "fac-db", + "binding_guid": "UUIDINDIA2", + "binding_name": null, + "credentials": { + "db_name": "postgres", + "host": "db", + "name": "postgres", + "password": "", + "port": "5432", + "uri": "postgres://postgres@db:5432/postgres?sslmode=disable", + "username": "postgres" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "fac-snapshot-db", + "provider": null, + "plan": null, + "name": "fac-snapshot-db", + "tags": [ + "database", + "docker" + ], + "instance_guid": "UUIDJULIET1", + "instance_name": "fac-snapshot-db", + "binding_guid": "UUIDJULIET2", + "binding_name": null, + "credentials": { + "db_name": "postgres", + "host": "db2", + "name": "postgres", + "password": "", + "port": "5432", + "uri": "postgres://postgres@db2:5432/postgres?sslmode=disable", + "username": "postgres" + }, + "syslog_drain_url": null, + "volume_mounts": [] + } + ] +} diff --git a/backend/cypress/support/dissemination-table-via-pdf.js b/backend/cypress/support/dissemination-table-via-pdf.js new file mode 100644 index 0000000000..173a814fb0 --- /dev/null +++ b/backend/cypress/support/dissemination-table-via-pdf.js @@ -0,0 +1,118 @@ +/* + Re-useable code for testing the dissemination table. +*/ + +// We're testing a 2x2. Actually, this would be better as a table, as this is a 3D test. +// is tribal is not tribal +// ┌────────────────────────────────┬────────────────────────────────┐ +// │ │ │ +// │ │ │ +// │ │ │ +// │ │ │ +// public │ UNPRIVILEGED KEY OK │ UNPRIVILEGED KEY OK │ +// │ │ │ +// │ │ │ +// │ │ │ +// │ │ │ +// ├────────────────────────────────┼────────────────────────────────┤ +// │ │ │ +// │ │ │ +// │ │ │ +// │ │ │ +// private │ PRIV KEY OK / UNPRIV NO │ DOES NOT COMPUTE │ +// │ │ │ +// │ │ │ +// │ │ │ +// │ │ │ +// └────────────────────────────────┴────────────────────────────────┘ + +// This could be reworked into a table test. +// However, the tests below should be clear +// enough to not require a full re-working. + +// Where +// T = Tribal(1)/Not Tribal(0) +// P = Private(1)/Public(0) +// U = Privileged(1)/Unprivileged(0) +// L = Length expected + +// T P U L +// ------- +// 0 0 0 1 +// 0 0 1 1 +// 0 1 0 - +// 0 1 1 - +// 1 0 0 1 +// 1 0 1 1 +// 1 1 0 0 +// 1 1 1 1 + + + +export function testSubmissionAccessViaPDF(reportId, isTribal, isPublic) { + console.log(`reportId: ${reportId}, isTribal: ${isTribal}, isPublic: ${isPublic}`); + + // The audit IS tribal and IS public + //////////////////////////////////////// + if (isTribal && isPublic) { + // When it is Tribal and public, we should always + // find the report id in the public and private endpoints + expect(isTribal).to.be.true + expect(isPublic).to.be.true + // We should be able to grab the PDF by URL + // https://app.fac.gov/dissemination/report/pdf/2023-04-GSAFAC-0000050825 + cy.request({ + method: 'GET', + url: '/dissemination/report/pdf/' + reportId + }).should((response) => { + expect(response.isOkStatusCode).to.equal(true); + }); + } + //////////////////////////////////////// + // The audit IS tribal and IS NOT public + //////////////////////////////////////// + else if (isTribal && !isPublic) { + expect(isTribal).to.be.true + expect(isPublic).to.be.false + cy.request({ + method: 'GET', + url: '/dissemination/report/pdf/' + reportId, + failOnStatusCode: false, + }).should((response) => { + expect(response.isOkStatusCode).to.equal(false); + }); + + } + //////////////////////////////////////// + // The audit IS NOT tribal and IS public + //////////////////////////////////////// + else if (!isTribal && isPublic) { + // This is a standard audit. + expect(isTribal).to.be.false + expect(isPublic).to.be.true + // We should always find it in all endpoints, priv or unpriv. + cy.request({ + method: 'GET', + url: '/dissemination/report/pdf/' + reportId + }).should((response) => { + expect(response.isOkStatusCode).to.equal(true); + }); + } + //////////////////////////////////////// + // The audit IS NOT tribal and IS NOT public + // (This is not possible.) + //////////////////////////////////////// + else if (!isTribal && !isPublic) { + console.log("Unreachable test case in testTribalAccess"); + expect(true).to.be.false; + } + //////////////////////////////////////// + // The audit somehow is none of the above. + // (This is not possible.) + //////////////////////////////////////// + else { + // We really should never be here. + console.log("The universe broke in testTribalAccess"); + expect(false).to.be.true; + }; +}; diff --git a/backend/cypress/support/full-submission.js b/backend/cypress/support/full-submission.js index 4fb304e486..38aded83c0 100644 --- a/backend/cypress/support/full-submission.js +++ b/backend/cypress/support/full-submission.js @@ -5,7 +5,7 @@ import { testAuditInformationForm } from './audit-info-form.js'; import { testPdfAuditReport } from './report-pdf.js'; import { testAuditorCertification } from './auditor-certification.js'; import { testAuditeeCertification } from './auditee-certification.js'; -import { testSubmissionAccess } from './dissemination-table.js'; +import { testSubmissionAccessViaPDF } from './dissemination-table-via-pdf.js'; import { testTribalAuditPublic, testTribalAuditPrivate } from './tribal-audit-form.js'; import { testInitializeAudit } from './initialize-audit.js'; import { testUnlock } from './unlock-cert.js'; @@ -133,7 +133,8 @@ export function testFullSubmission(isTribal, isPublic) { /audits are complete/ ).siblings().contains('td', reportId); - testSubmissionAccess(reportId, isTribal, isPublic); + // FIXME Ticketed as + testSubmissionAccessViaPDF(reportId, isTribal, isPublic); }); testLogoutGov(); diff --git a/backend/dissemination/admin.py b/backend/dissemination/admin.py index cc1e4115a2..82b94f2e3e 100644 --- a/backend/dissemination/admin.py +++ b/backend/dissemination/admin.py @@ -1,5 +1,4 @@ from django.contrib import admin - from dissemination.models import ( AdditionalEin, AdditionalUei, @@ -11,7 +10,9 @@ Note, Passthrough, SecondaryAuditor, + TribalApiAccessKeyIds, ) +import datetime class AdditionalEinAdmin(admin.ModelAdmin): @@ -257,6 +258,29 @@ def has_view_permission(self, request, obj=None): search_fields = ("report_id",) +class TribalApiAccessKeyIdsAdmin(admin.ModelAdmin): + + list_display = ( + "email", + "key_id", + "date_added", + ) + + search_fields = ( + "email", + "key_id", + ) + + fields = [ + "email", + "key_id", + ] + + def save_model(self, request, obj, form, change): + obj.date_added = datetime.date.today() + super().save_model(request, obj, form, change) + + admin.site.register(AdditionalEin, AdditionalEinAdmin) admin.site.register(AdditionalUei, AdditionalUeiAdmin) admin.site.register(CapText, CapTextAdmin) @@ -267,3 +291,4 @@ def has_view_permission(self, request, obj=None): admin.site.register(Note, NoteAdmin) admin.site.register(Passthrough, PassThroughAdmin) admin.site.register(SecondaryAuditor, SecondaryAuditorAdmin) +admin.site.register(TribalApiAccessKeyIds, TribalApiAccessKeyIdsAdmin) diff --git a/backend/dissemination/api/api/drop_schema.sql b/backend/dissemination/api/api/drop_schema.sql deleted file mode 100644 index 4613e1c1e4..0000000000 --- a/backend/dissemination/api/api/drop_schema.sql +++ /dev/null @@ -1,10 +0,0 @@ - -begin; - -DROP SCHEMA IF EXISTS api CASCADE; - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/base.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/base.sql deleted file mode 100644 index 37e9d7347f..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/base.sql +++ /dev/null @@ -1,5 +0,0 @@ -begin; -select 1; -commit; - -notify pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_functions.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/create_functions.sql deleted file mode 100644 index 37e9d7347f..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_functions.sql +++ /dev/null @@ -1,5 +0,0 @@ -begin; -select 1; -commit; - -notify pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_schema.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/create_schema.sql deleted file mode 100644 index 127b6b4319..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_schema.sql +++ /dev/null @@ -1,42 +0,0 @@ --- This schema is handled external to the app. --- Why? --- It relies on static tables that are loaded before the app exists. --- Therefore, we assume those tables are loaded. Or, mostly assume. --- This grants permissions, nothing more. - -begin; - -do -$$ -begin - -- If it exists, grant permissions. - if exists (select schema_name from information_schema.schemata where schema_name = 'api_historic_v0_1_0_alpha') then - -- Grant access to tables and views - alter default privileges - in schema api_historic_v0_1_0_alpha - grant select - -- this includes views - on tables - to api_fac_gov; - - -- Grant access to sequences, if we have them - grant usage on schema api_historic_v0_1_0_alpha to api_fac_gov; - grant select, usage on all sequences in schema api_historic_v0_1_0_alpha to api_fac_gov; - alter default privileges - in schema api_historic_v0_1_0_alpha - grant select, usage - on sequences - to api_fac_gov; - - GRANT SELECT ON ALL TABLES IN SCHEMA api_historic_v0_1_0_alpha TO api_fac_gov; - end if; -end -$$ -; - -select 1; - -commit; - -notify pgrst, 'reload schema'; - diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_views.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/create_views.sql deleted file mode 100644 index 37e9d7347f..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/create_views.sql +++ /dev/null @@ -1,5 +0,0 @@ -begin; -select 1; -commit; - -notify pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_schema.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_schema.sql deleted file mode 100644 index 37e9d7347f..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_schema.sql +++ /dev/null @@ -1,5 +0,0 @@ -begin; -select 1; -commit; - -notify pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_views.sql b/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_views.sql deleted file mode 100644 index 37e9d7347f..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/drop_views.sql +++ /dev/null @@ -1,5 +0,0 @@ -begin; -select 1; -commit; - -notify pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_historic_v0_1_0_alpha/views.py b/backend/dissemination/api/api_historic_v0_1_0_alpha/views.py deleted file mode 100644 index 993dd3be76..0000000000 --- a/backend/dissemination/api/api_historic_v0_1_0_alpha/views.py +++ /dev/null @@ -1,49 +0,0 @@ -schema = "api_historic_v0_1_0_alpha" -prefix = "census_" - -tables = { - "agency": (16, 22), - "captext": (19, 22), - "captext_formatted": (19, 22), - "cfda": (16, 22), - "cpas": (16, 22), - "duns": (16, 22), - "eins": (16, 22), - "findings": (16, 22), - "findingstext": (19, 22), - "findingstext_formatted": (19, 22), - "gen": (16, 22), - "notes": (19, 22), - "passthrough": (16, 22), - "revisions": (19, 22), - "ueis": (22, 22), -} - - -def just_table_names(lot): - return list(tables.keys()) - - -def generate_views(tbs): - print("begin;\n") - for t, rng in tbs.items(): - # Range is exclusive on the second value - for v in range(rng[0], rng[1] + 1): - print(f"create view {schema}.{t}{v} as") - print("\tselect *") - print(f"\tfrom {prefix}{t}{v}") - print(f"\torder by {prefix}{t}{v}.id") - print(";\n") - print("commit;") - print("notify pgrst, 'reload schema';") - - -if __name__ in "__main__": - generate_views(tables) - -# (define (generate-drops lot) -# (printf "begin;~n~n") -# (for ([t lot]) -# (printf "drop table if exists ~a.~a;~n" schema t)) -# (printf "commit;~n") -# (printf "notify pgrst, 'reload schema';~n")) diff --git a/backend/dissemination/api/api_v1_0_3/base.sql b/backend/dissemination/api/api_v1_0_3/base.sql deleted file mode 100644 index dedabe0cb7..0000000000 --- a/backend/dissemination/api/api_v1_0_3/base.sql +++ /dev/null @@ -1,29 +0,0 @@ -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'authenticator') THEN - RAISE NOTICE 'Role "authenticator" already exists. Skipping.'; - ELSE - CREATE ROLE authenticator LOGIN NOINHERIT NOCREATEDB NOCREATEROLE NOSUPERUSER; - END IF; -END -$do$; - -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'api_fac_gov') THEN - RAISE NOTICE 'Role "api_fac_gov" already exists. Skipping.'; - ELSE - CREATE ROLE api_fac_gov NOLOGIN; - END IF; -END -$do$; - -GRANT api_fac_gov TO authenticator; - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_0_3/create_functions.sql b/backend/dissemination/api/api_v1_0_3/create_functions.sql deleted file mode 100644 index 62d2b400e7..0000000000 --- a/backend/dissemination/api/api_v1_0_3/create_functions.sql +++ /dev/null @@ -1,24 +0,0 @@ --- WARNING --- Under PostgreSQL 12, the functions below work. --- Under PostgreSQL 14, these will break. --- --- Note the differences: --- --- raise info 'Works under PostgreSQL 12'; --- raise info 'request.header.x-magic %', (SELECT current_setting('request.header.x-magic', true)); --- raise info 'request.jwt.claim.expires %', (SELECT current_setting('request.jwt.claim.expires', true)); --- raise info 'Works under PostgreSQL 14'; --- raise info 'request.headers::json->>x-magic %', (SELECT current_setting('request.headers', true)::json->>'x-magic'); --- raise info 'request.jwt.claims::json->expires %', (SELECT current_setting('request.jwt.claims', true)::json->>'expires'); --- --- To quote the work of Dav Pilkey, "remember this now." - --- We don't grant tribal access (yet) -create or replace function api_v1_0_3_functions.has_tribal_data_access() returns boolean -as $has_tribal_data_access$ -BEGIN - RETURN 0::BOOLEAN; -END; -$has_tribal_data_access$ LANGUAGE plpgsql; - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_0_3/create_schema.sql b/backend/dissemination/api/api_v1_0_3/create_schema.sql deleted file mode 100644 index 089e746f2f..0000000000 --- a/backend/dissemination/api/api_v1_0_3/create_schema.sql +++ /dev/null @@ -1,52 +0,0 @@ -begin; - -do -$$ -begin - DROP SCHEMA IF EXISTS api_v1_0_3 CASCADE; - DROP SCHEMA IF EXISTS api_v1_0_3_functions CASCADE; - - if not exists (select schema_name from information_schema.schemata where schema_name = 'api_v1_0_3') then - create schema api_v1_0_3; - create schema api_v1_0_3_functions; - - grant usage on schema api_v1_0_3_functions to api_fac_gov; - - -- Grant access to tables and views - alter default privileges - in schema api_v1_0_3 - grant select - -- this includes views - on tables - to api_fac_gov; - - -- Grant access to sequences, if we have them - grant usage on schema api_v1_0_3 to api_fac_gov; - grant select, usage on all sequences in schema api_v1_0_3 to api_fac_gov; - alter default privileges - in schema api_v1_0_3 - grant select, usage - on sequences - to api_fac_gov; - end if; -end -$$ -; - --- This is the description -COMMENT ON SCHEMA api_v1_0_3 IS - 'The FAC dissemation API version 1.0.3.' -; - --- https://postgrest.org/en/stable/references/api/openapi.html --- This is the title -COMMENT ON SCHEMA api_v1_0_3 IS -$$v1.0.3 - -A RESTful API that serves data from the SF-SAC.$$; - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/api/api_v1_0_3/drop_schema.sql b/backend/dissemination/api/api_v1_0_3/drop_schema.sql deleted file mode 100644 index cf1aca6d91..0000000000 --- a/backend/dissemination/api/api_v1_0_3/drop_schema.sql +++ /dev/null @@ -1,11 +0,0 @@ - -begin; - -DROP SCHEMA IF EXISTS api_v1_0_3 CASCADE; --- DROP ROLE IF EXISTS authenticator; --- DROP ROLE IF EXISTS api_fac_gov; - -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/api/api_v1_0_3/drop_views.sql b/backend/dissemination/api/api_v1_0_3/drop_views.sql deleted file mode 100644 index 2775087977..0000000000 --- a/backend/dissemination/api/api_v1_0_3/drop_views.sql +++ /dev/null @@ -1,17 +0,0 @@ -begin; - drop table if exists api_v1_0_3.metadata; - drop view if exists api_v1_0_3.general; - drop view if exists api_v1_0_3.auditor; - drop view if exists api_v1_0_3.federal_awards; - drop view if exists api_v1_0_3.findings; - drop view if exists api_v1_0_3.findings_text; - drop view if exists api_v1_0_3.corrective_action_plans; - drop view if exists api_v1_0_3.additional_ueis; - drop view if exists api_v1_0_3.notes_to_sefa; - drop view if exists api_v1_0_3.passthrough; - drop view if exists api_v1_0_3.secondary_auditors; - drop view if exists api_v1_0_3.additional_eins; -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_0/base.sql b/backend/dissemination/api/api_v1_1_0/base.sql deleted file mode 100644 index dedabe0cb7..0000000000 --- a/backend/dissemination/api/api_v1_1_0/base.sql +++ /dev/null @@ -1,29 +0,0 @@ -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'authenticator') THEN - RAISE NOTICE 'Role "authenticator" already exists. Skipping.'; - ELSE - CREATE ROLE authenticator LOGIN NOINHERIT NOCREATEDB NOCREATEROLE NOSUPERUSER; - END IF; -END -$do$; - -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'api_fac_gov') THEN - RAISE NOTICE 'Role "api_fac_gov" already exists. Skipping.'; - ELSE - CREATE ROLE api_fac_gov NOLOGIN; - END IF; -END -$do$; - -GRANT api_fac_gov TO authenticator; - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_0/create_functions.sql b/backend/dissemination/api/api_v1_1_0/create_functions.sql deleted file mode 100644 index 248a259cc2..0000000000 --- a/backend/dissemination/api/api_v1_1_0/create_functions.sql +++ /dev/null @@ -1,59 +0,0 @@ --- WARNING --- Under PostgreSQL 12, the functions below work. --- Under PostgreSQL 14, these will break. --- --- Note the differences: --- --- raise info 'Works under PostgreSQL 12'; --- raise info 'request.header.x-magic %', (SELECT current_setting('request.header.x-magic', true)); --- raise info 'request.jwt.claim.expires %', (SELECT current_setting('request.jwt.claim.expires', true)); --- raise info 'Works under PostgreSQL 14'; --- raise info 'request.headers::json->>x-magic %', (SELECT current_setting('request.headers', true)::json->>'x-magic'); --- raise info 'request.jwt.claims::json->expires %', (SELECT current_setting('request.jwt.claims', true)::json->>'expires'); --- --- To quote the work of Dav Pilkey, "remember this now." - - -CREATE OR REPLACE FUNCTION api_v1_1_0_functions.get_header(item text) RETURNS text - AS $get_header$ - declare res text; - begin - SELECT (current_setting('request.headers', true)::json)->>item into res; - return res; - end; -$get_header$ LANGUAGE plpgsql; - -create or replace function api_v1_1_0_functions.get_api_key_uuid() returns TEXT -as $gaku$ -declare uuid text; -begin - select api_v1_1_0_functions.get_header('x-api-user-id') into uuid; - return uuid; -end; -$gaku$ LANGUAGE plpgsql; - -create or replace function api_v1_1_0_functions.has_tribal_data_access() -returns boolean -as $has_tribal_data_access$ -DECLARE - uuid_header UUID; - key_exists boolean; -BEGIN - - SELECT api_v1_1_0_functions.get_api_key_uuid() INTO uuid_header; - SELECT - CASE WHEN EXISTS ( - SELECT key_id - FROM public.dissemination_TribalApiAccessKeyIds taaki - WHERE taaki.key_id = uuid_header::TEXT) - THEN 1::BOOLEAN - ELSE 0::BOOLEAN - END - INTO key_exists; - RAISE INFO 'api_v1_1_0 has_tribal % %', uuid_header, key_exists; - RETURN key_exists; -END; -$has_tribal_data_access$ LANGUAGE plpgsql; - - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_0/create_schema.sql b/backend/dissemination/api/api_v1_1_0/create_schema.sql deleted file mode 100644 index 33b9bd4161..0000000000 --- a/backend/dissemination/api/api_v1_1_0/create_schema.sql +++ /dev/null @@ -1,48 +0,0 @@ -begin; - -do -$$ -begin - DROP SCHEMA IF EXISTS api_v1_1_0 CASCADE; - DROP SCHEMA IF EXISTS api_v1_1_0_functions CASCADE; - - if not exists (select schema_name from information_schema.schemata where schema_name = 'api_v1_1_0') then - create schema api_v1_1_0; - create schema api_v1_1_0_functions; - - grant usage on schema api_v1_1_0_functions to api_fac_gov; - - -- Grant access to tables and views - alter default privileges - in schema api_v1_1_0 - grant select - -- this includes views - on tables - to api_fac_gov; - - -- Grant access to sequences, if we have them - grant usage on schema api_v1_1_0 to api_fac_gov; - grant select, usage on all sequences in schema api_v1_1_0 to api_fac_gov; - alter default privileges - in schema api_v1_1_0 - grant select, usage - on sequences - to api_fac_gov; - end if; -end -$$ -; - --- https://postgrest.org/en/stable/references/api/openapi.html --- This is the title (version number) and description (text). -COMMENT ON SCHEMA api_v1_1_0 IS -$$v1.1.0 - -A RESTful API that serves data from the SF-SAC.$$; - - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/api/api_v1_1_0/drop_schema.sql b/backend/dissemination/api/api_v1_1_0/drop_schema.sql deleted file mode 100644 index e32038ee46..0000000000 --- a/backend/dissemination/api/api_v1_1_0/drop_schema.sql +++ /dev/null @@ -1,11 +0,0 @@ - -begin; - -DROP SCHEMA IF EXISTS api_v1_1_0 CASCADE; --- DROP ROLE IF EXISTS authenticator; --- DROP ROLE IF EXISTS api_fac_gov; - -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_0/drop_views.sql b/backend/dissemination/api/api_v1_1_0/drop_views.sql deleted file mode 100644 index 877d5920c6..0000000000 --- a/backend/dissemination/api/api_v1_1_0/drop_views.sql +++ /dev/null @@ -1,17 +0,0 @@ -begin; - drop table if exists api_v1_1_0.metadata; - drop view if exists api_v1_1_0.general; - drop view if exists api_v1_1_0.auditor; - drop view if exists api_v1_1_0.federal_awards; - drop view if exists api_v1_1_0.findings; - drop view if exists api_v1_1_0.findings_text; - drop view if exists api_v1_1_0.corrective_action_plans; - drop view if exists api_v1_1_0.additional_ueis; - drop view if exists api_v1_1_0.notes_to_sefa; - drop view if exists api_v1_1_0.passthrough; - drop view if exists api_v1_1_0.secondary_auditors; - drop view if exists api_v1_1_0.additional_eins; -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_1/base.sql b/backend/dissemination/api/api_v1_1_1/base.sql deleted file mode 100644 index dedabe0cb7..0000000000 --- a/backend/dissemination/api/api_v1_1_1/base.sql +++ /dev/null @@ -1,29 +0,0 @@ -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'authenticator') THEN - RAISE NOTICE 'Role "authenticator" already exists. Skipping.'; - ELSE - CREATE ROLE authenticator LOGIN NOINHERIT NOCREATEDB NOCREATEROLE NOSUPERUSER; - END IF; -END -$do$; - -DO -$do$ -BEGIN - IF EXISTS ( - SELECT FROM pg_catalog.pg_roles - WHERE rolname = 'api_fac_gov') THEN - RAISE NOTICE 'Role "api_fac_gov" already exists. Skipping.'; - ELSE - CREATE ROLE api_fac_gov NOLOGIN; - END IF; -END -$do$; - -GRANT api_fac_gov TO authenticator; - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_1/create_functions.sql b/backend/dissemination/api/api_v1_1_1/create_functions.sql deleted file mode 100644 index 87114cd660..0000000000 --- a/backend/dissemination/api/api_v1_1_1/create_functions.sql +++ /dev/null @@ -1,106 +0,0 @@ --- WARNING --- Under PostgreSQL 12, the functions below work. --- Under PostgreSQL 14, these will break. --- --- Note the differences: --- --- raise info 'Works under PostgreSQL 12'; --- raise info 'request.header.x-magic %', (SELECT current_setting('request.header.x-magic', true)); --- raise info 'request.jwt.claim.expires %', (SELECT current_setting('request.jwt.claim.expires', true)); --- raise info 'Works under PostgreSQL 14'; --- raise info 'request.headers::json->>x-magic %', (SELECT current_setting('request.headers', true)::json->>'x-magic'); --- raise info 'request.jwt.claims::json->expires %', (SELECT current_setting('request.jwt.claims', true)::json->>'expires'); --- --- To quote the work of Dav Pilkey, "remember this now." - - -CREATE OR REPLACE FUNCTION api_v1_1_1_functions.get_header(item text) RETURNS text - AS $get_header$ - declare res text; - begin - SELECT (current_setting('request.headers', true)::json)->>item into res; - return res; - end; -$get_header$ LANGUAGE plpgsql; - -create or replace function api_v1_1_1_functions.get_api_key_uuid() returns TEXT -as $gaku$ -declare uuid text; -begin - select api_v1_1_1_functions.get_header('x-api-user-id') into uuid; - return uuid; -end; -$gaku$ LANGUAGE plpgsql; - -create or replace function api_v1_1_1_functions.has_tribal_data_access() -returns boolean -as $has_tribal_data_access$ -DECLARE - uuid_header UUID; - key_exists boolean; -BEGIN - - SELECT api_v1_1_1_functions.get_api_key_uuid() INTO uuid_header; - SELECT - CASE WHEN EXISTS ( - SELECT key_id - FROM public.dissemination_TribalApiAccessKeyIds taaki - WHERE taaki.key_id = uuid_header::TEXT) - THEN 1::BOOLEAN - ELSE 0::BOOLEAN - END - INTO key_exists; - RAISE INFO 'api_v1_1_1 has_tribal % %', uuid_header, key_exists; - RETURN key_exists; -END; -$has_tribal_data_access$ LANGUAGE plpgsql; - - -CREATE OR REPLACE FUNCTION api_v1_1_1.request_file_access( - report_id TEXT -) RETURNS JSON LANGUAGE plpgsql AS -$$ -DECLARE - v_uuid_header TEXT; - v_access_uuid VARCHAR(200); - v_key_exists BOOLEAN; - v_key_added_date DATE; -BEGIN - - SELECT api_v1_1_1_functions.get_api_key_uuid() INTO v_uuid_header; - - -- Check if the provided API key exists in public.dissemination_TribalApiAccessKeyIds - SELECT - EXISTS( - SELECT 1 - FROM public.dissemination_TribalApiAccessKeyIds - WHERE key_id = v_uuid_header - ) INTO v_key_exists; - - - -- Get the added date of the key from public.dissemination_TribalApiAccessKeyIds - SELECT date_added - INTO v_key_added_date - FROM public.dissemination_TribalApiAccessKeyIds - WHERE key_id = v_uuid_header; - - - -- Check if the key is less than 6 months old - IF v_uuid_header IS NOT NULL AND v_key_exists AND v_key_added_date >= CURRENT_DATE - INTERVAL '6 months' THEN - -- Generate UUID (using PostgreSQL's gen_random_uuid function) - SELECT gen_random_uuid() INTO v_access_uuid; - - -- Inserting data into the one_time_access table - INSERT INTO public.dissemination_onetimeaccess (uuid, api_key_id, timestamp, report_id) - VALUES (v_access_uuid::UUID, v_uuid_header, CURRENT_TIMESTAMP, report_id); - - -- Return the UUID to the user - RETURN json_build_object('access_uuid', v_access_uuid); - ELSE - -- Return an error for unauthorized access - RETURN json_build_object('error', 'Unauthorized access or key older than 6 months')::JSON; - END IF; -END; -$$; - -NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_1/create_schema.sql b/backend/dissemination/api/api_v1_1_1/create_schema.sql deleted file mode 100644 index 2b6a58d553..0000000000 --- a/backend/dissemination/api/api_v1_1_1/create_schema.sql +++ /dev/null @@ -1,48 +0,0 @@ -begin; - -do -$$ -begin - DROP SCHEMA IF EXISTS api_v1_1_1 CASCADE; - DROP SCHEMA IF EXISTS api_v1_1_1_functions CASCADE; - - if not exists (select schema_name from information_schema.schemata where schema_name = 'api_v1_1_1') then - create schema api_v1_1_1; - create schema api_v1_1_1_functions; - - grant usage on schema api_v1_1_1_functions to api_fac_gov; - - -- Grant access to tables and views - alter default privileges - in schema api_v1_1_1 - grant select - -- this includes views - on tables - to api_fac_gov; - - -- Grant access to sequences, if we have them - grant usage on schema api_v1_1_1 to api_fac_gov; - grant select, usage on all sequences in schema api_v1_1_1 to api_fac_gov; - alter default privileges - in schema api_v1_1_1 - grant select, usage - on sequences - to api_fac_gov; - end if; -end -$$ -; - --- https://postgrest.org/en/stable/references/api/openapi.html --- This is the title (version number) and description (text). -COMMENT ON SCHEMA api_v1_1_1 IS -$$v1.1.1 - -A RESTful API that serves data from the SF-SAC.$$; - - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/api/api_v1_1_1/create_views.sql b/backend/dissemination/api/api_v1_1_1/create_views.sql deleted file mode 100644 index 7fdb73e109..0000000000 --- a/backend/dissemination/api/api_v1_1_1/create_views.sql +++ /dev/null @@ -1,410 +0,0 @@ -begin; - ---------------------------------------- --- finding_text ---------------------------------------- -create view api_v1_1_1.findings_text as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - ft.finding_ref_number, - ft.contains_chart_or_table, - ft.finding_text - from - dissemination_findingtext ft, - dissemination_general gen - where - ft.report_id = gen.report_id - and - (gen.is_public = true - or (gen.is_public = false and api_v1_1_1_functions.has_tribal_data_access())) - order by ft.id -; - ---------------------------------------- --- additional_ueis ---------------------------------------- -create view api_v1_1_1.additional_ueis as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - uei.additional_uei - from - dissemination_general gen, - dissemination_additionaluei uei - where - gen.report_id = uei.report_id - order by uei.id -; - ---------------------------------------- --- finding ---------------------------------------- -create view api_v1_1_1.findings as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - finding.award_reference, - finding.reference_number, - finding.is_material_weakness, - finding.is_modified_opinion, - finding.is_other_findings, - finding.is_other_matters, - finding.prior_finding_ref_numbers, - finding.is_questioned_costs, - finding.is_repeat_finding, - finding.is_significant_deficiency, - finding.type_requirement - from - dissemination_finding finding, - dissemination_general gen - where - finding.report_id = gen.report_id - order by finding.id -; - ---------------------------------------- --- federal award ---------------------------------------- -create view api_v1_1_1.federal_awards as - select - award.report_id, - gen.auditee_uei, - gen.audit_year, - --- - award.award_reference, - award.federal_agency_prefix, - award.federal_award_extension, - award.additional_award_identification, - award.federal_program_name, - award.amount_expended, - award.cluster_name, - award.other_cluster_name, - award.state_cluster_name, - award.cluster_total, - award.federal_program_total, - award.is_major, - award.is_loan, - award.loan_balance, - award.is_direct, - award.audit_report_type, - award.findings_count, - award.is_passthrough_award, - award.passthrough_amount - from - dissemination_federalaward award, - dissemination_general gen - where - award.report_id = gen.report_id - order by award.id -; - - ---------------------------------------- --- corrective_action_plan ---------------------------------------- -create view api_v1_1_1.corrective_action_plans as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - ct.finding_ref_number, - ct.contains_chart_or_table, - ct.planned_action - from - dissemination_CAPText ct, - dissemination_General gen - where - ct.report_id = gen.report_id - and - (gen.is_public = true - or (gen.is_public = false and api_v1_1_1_functions.has_tribal_data_access())) - order by ct.id -; - ---------------------------------------- --- notes_to_sefa ---------------------------------------- -create view api_v1_1_1.notes_to_sefa as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - note.note_title as title, - note.accounting_policies, - note.is_minimis_rate_used, - note.rate_explained, - note.content, - note.contains_chart_or_table - from - dissemination_general gen, - dissemination_note note - where - note.report_id = gen.report_id - and - (gen.is_public = true - or (gen.is_public = false and api_v1_1_1_functions.has_tribal_data_access())) - order by note.id -; - ---------------------------------------- --- passthrough ---------------------------------------- -create view api_v1_1_1.passthrough as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - pass.award_reference, - pass.passthrough_id, - pass.passthrough_name - from - dissemination_general as gen, - dissemination_passthrough as pass - where - gen.report_id = pass.report_id - order by pass.id -; - - ---------------------------------------- --- general ---------------------------------------- -create view api_v1_1_1.general as - select - -- every table starts with report_id, UEI, and year - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - gen.auditee_certify_name, - gen.auditee_certify_title, - gen.auditee_contact_name, - gen.auditee_email, - gen.auditee_name, - gen.auditee_phone, - gen.auditee_contact_title, - gen.auditee_address_line_1, - gen.auditee_city, - gen.auditee_state, - gen.auditee_ein, - gen.auditee_zip, - -- auditor - gen.auditor_certify_name, - gen.auditor_certify_title, - gen.auditor_phone, - gen.auditor_state, - gen.auditor_city, - gen.auditor_contact_title, - gen.auditor_address_line_1, - gen.auditor_zip, - gen.auditor_country, - gen.auditor_contact_name, - gen.auditor_email, - gen.auditor_firm_name, - gen.auditor_foreign_address, - gen.auditor_ein, - -- agency - gen.cognizant_agency, - gen.oversight_agency, - -- dates - gen.date_created, - gen.ready_for_certification_date, - gen.auditor_certified_date, - gen.auditee_certified_date, - gen.submitted_date, - gen.fac_accepted_date, - gen.fy_end_date, - gen.fy_start_date, - gen.audit_type, - gen.gaap_results, - gen.sp_framework_basis, - gen.is_sp_framework_required, - gen.sp_framework_opinions, - gen.is_going_concern_included, - gen.is_internal_control_deficiency_disclosed, - gen.is_internal_control_material_weakness_disclosed, - gen.is_material_noncompliance_disclosed, - gen.dollar_threshold, - gen.is_low_risk_auditee, - gen.agencies_with_prior_findings, - gen.entity_type, - gen.number_months, - gen.audit_period_covered, - gen.total_amount_expended, - gen.type_audit_code, - gen.is_public, - gen.data_source, - gen.is_aicpa_audit_guide_included, - gen.is_additional_ueis, - CASE EXISTS(SELECT ein.report_id FROM dissemination_additionalein ein WHERE ein.report_id = gen.report_id) - WHEN FALSE THEN 'No' - ELSE 'Yes' - END AS is_multiple_eins, - CASE EXISTS(SELECT aud.report_id FROM dissemination_secondaryauditor aud WHERE aud.report_id = gen.report_id) - WHEN FALSE THEN 'No' - ELSE 'Yes' - END AS is_secondary_auditors - from - dissemination_general gen - order by gen.id -; - ---------------------------------------- --- auditor (secondary auditor) ---------------------------------------- -create view api_v1_1_1.secondary_auditors as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - sa.auditor_ein, - sa.auditor_name, - sa.contact_name, - sa.contact_title, - sa.contact_email, - sa.contact_phone, - sa.address_street, - sa.address_city, - sa.address_state, - sa.address_zipcode - from - dissemination_General gen, - dissemination_SecondaryAuditor sa - where - sa.report_id = gen.report_id - order by sa.id -; - -create view api_v1_1_1.additional_eins as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - ein.additional_ein - from - dissemination_general gen, - dissemination_additionalein ein - where - gen.report_id = ein.report_id - order by ein.id -; - --- Specify every field in dissemination_combined, omitting the id. --- Generated fields like ALN are done in the creation of the table, not here. -create view api_v1_1_1.combined as - select - combined.report_id, - combined.award_reference, - combined.reference_number, - combined.aln, - combined.agencies_with_prior_findings, - combined.audit_period_covered, - combined.audit_type, - combined.audit_year, - combined.auditee_address_line_1, - combined.auditee_certified_date, - combined.auditee_certify_name, - combined.auditee_certify_title, - combined.auditee_city, - combined.auditee_contact_name, - combined.auditee_contact_title, - combined.auditee_ein, - combined.auditee_email, - combined.auditee_name, - combined.auditee_phone, - combined.auditee_state, - combined.auditee_uei, - combined.auditee_zip, - combined.auditor_address_line_1, - combined.auditor_certified_date, - combined.auditor_certify_name, - combined.auditor_certify_title, - combined.auditor_city, - combined.auditor_contact_name, - combined.auditor_contact_title, - combined.auditor_country, - combined.auditor_ein, - combined.auditor_email, - combined.auditor_firm_name, - combined.auditor_foreign_address, - combined.auditor_phone, - combined.auditor_state, - combined.auditor_zip, - combined.cognizant_agency, - combined.data_source, - combined.date_created, - combined.dollar_threshold, - combined.entity_type, - combined.fac_accepted_date, - combined.fy_end_date, - combined.fy_start_date, - combined.gaap_results, - combined.is_additional_ueis, - combined.is_aicpa_audit_guide_included, - combined.is_going_concern_included, - combined.is_internal_control_deficiency_disclosed, - combined.is_internal_control_material_weakness_disclosed, - combined.is_low_risk_auditee, - combined.is_material_noncompliance_disclosed, - combined.is_public, - combined.is_sp_framework_required, - combined.number_months, - combined.oversight_agency, - combined.ready_for_certification_date, - combined.sp_framework_basis, - combined.sp_framework_opinions, - combined.submitted_date, - combined.total_amount_expended, - combined.type_audit_code, - combined.additional_award_identification, - combined.amount_expended, - combined.cluster_name, - combined.cluster_total, - combined.federal_agency_prefix, - combined.federal_award_extension, - combined.federal_program_name, - combined.federal_program_total, - combined.findings_count, - combined.is_direct, - combined.is_loan, - combined.is_major, - combined.is_passthrough_award, - combined.loan_balance, - combined.audit_report_type, - combined.other_cluster_name, - combined.passthrough_amount, - combined.state_cluster_name, - combined.is_material_weakness, - combined.is_modified_opinion, - combined.is_other_findings, - combined.is_other_matters, - combined.is_questioned_costs, - combined.is_repeat_finding, - combined.is_significant_deficiency, - combined.prior_finding_ref_numbers, - combined.type_requirement, - combined.passthrough_name, - combined.passthrough_id - from - dissemination_combined combined - where - (combined.is_public = true - or (combined.is_public = false and api_v1_1_1_functions.has_tribal_data_access())) - order by combined.id -; - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/api/api_v1_1_1/drop_schema.sql b/backend/dissemination/api/api_v1_1_1/drop_schema.sql deleted file mode 100644 index 705153e878..0000000000 --- a/backend/dissemination/api/api_v1_1_1/drop_schema.sql +++ /dev/null @@ -1,11 +0,0 @@ - -begin; - -DROP SCHEMA IF EXISTS api_v1_1_1 CASCADE; --- DROP ROLE IF EXISTS authenticator; --- DROP ROLE IF EXISTS api_fac_gov; - -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/api/api_v1_1_1/drop_views.sql b/backend/dissemination/api/api_v1_1_1/drop_views.sql deleted file mode 100644 index c95c0fc139..0000000000 --- a/backend/dissemination/api/api_v1_1_1/drop_views.sql +++ /dev/null @@ -1,18 +0,0 @@ -begin; - drop table if exists api_v1_1_1.metadata; - drop view if exists api_v1_1_1.general; - drop view if exists api_v1_1_1.auditor; - drop view if exists api_v1_1_1.federal_awards; - drop view if exists api_v1_1_1.findings; - drop view if exists api_v1_1_1.findings_text; - drop view if exists api_v1_1_1.corrective_action_plans; - drop view if exists api_v1_1_1.additional_ueis; - drop view if exists api_v1_1_1.notes_to_sefa; - drop view if exists api_v1_1_1.passthrough; - drop view if exists api_v1_1_1.secondary_auditors; - drop view if exists api_v1_1_1.additional_eins; - drop view if exists api_v1_1_1.combined; -commit; - -notify pgrst, - 'reload schema'; diff --git a/backend/dissemination/management/commands/generate_dump_files.py b/backend/dissemination/management/commands/generate_dump_files.py new file mode 100644 index 0000000000..78a413ed81 --- /dev/null +++ b/backend/dissemination/management/commands/generate_dump_files.py @@ -0,0 +1,112 @@ +import json +from collections import namedtuple as NT + +from django.core.management.base import BaseCommand +from dissemination.models import ( + General, + FederalAward, + Finding, + FindingText, + AdditionalUei, + AdditionalEin, + CapText, + Note, + Passthrough, + SecondaryAuditor, +) + +from django.forms import model_to_dict +from django.core.serializers.json import DjangoJSONEncoder +from django.db.models import Model + +Table = NT("Table", "model,name,is_public_table") +# Exclude General here. +TABLES_TO_DUMP = [ + # Public tables + Table(FederalAward, "federal_awards", True), + Table(Finding, "findings", True), + Table(Passthrough, "passthroughs", True), + Table(AdditionalUei, "additional_ueis", True), + Table(AdditionalEin, "additional_eins", True), + Table(SecondaryAuditor, "secondary_auditors", True), + # Suppressed tables + Table(FindingText, "findings_text", False), + Table(CapText, "corrective_action_plan_text", False), + Table(Note, "notes_to_sefa", False), +] + + +# https://stackoverflow.com/questions/757022/how-do-you-serialize-a-model-instance-in-django +class ExtendedEncoder(DjangoJSONEncoder): + def default(self, o): + if isinstance(o, Model): + d = model_to_dict(o) + if "id" in d: + del d["id"] + return d + return super().default(o) + + +def dump_general(audit_year): + public_report_ids = [] + private_report_ids = [] + objs = General.objects.filter(audit_year=audit_year) + with open(f"{audit_year}-general.json", "w") as fp: + fp.write("[") + first = True + for o in objs: + if o.is_public: + public_report_ids.append(o.report_id) + else: + private_report_ids.append(o.report_id) + if first: + fp.write("\n") + first = False + else: + fp.write(",\n") + fp.write("\t") + fp.write(json.dumps(o, cls=ExtendedEncoder)) + fp.write("\n]\n") + fp.close() + return (public_report_ids, private_report_ids) + + +def dump_table(table, audit_year, report_ids): + with open(f"{audit_year}-{table.name}.json", "w") as fp: + fp.write("[") + first = True + for rid in report_ids: + objs = table.model.objects.filter(report_id=rid) + for o in objs: + if first: + fp.write("\n") + first = False + else: + fp.write(",\n") + fp.write("\t") + fp.write(json.dumps(o, cls=ExtendedEncoder)) + fp.write("\n]\n") + fp.close() + + +class Command(BaseCommand): + help = """ + Runs sql scripts to recreate access tables for the postgrest API. + """ + + def add_arguments(self, parser): + parser.add_argument( + "-y", "--year", choices=[f"20{x}" for x in range(16, 24)], default=False + ) + + def handle(self, *args, **options): + audit_year = options["year"] + (public_report_ids, private_report_ids) = dump_general(audit_year) + for table in TABLES_TO_DUMP: + # If it is a public table, dump everything. + # If it is not a public table, then we only dump + # the report IDs that were marked is_public=True + if table.is_public_table: + dump_table(table, audit_year, public_report_ids + private_report_ids) + else: + dump_table(table, audit_year, public_report_ids) diff --git a/backend/dissemination/migrations/0020_alter_disseminationcombined_table.py b/backend/dissemination/migrations/0020_alter_disseminationcombined_table.py new file mode 100644 index 0000000000..0502e03bc4 --- /dev/null +++ b/backend/dissemination/migrations/0020_alter_disseminationcombined_table.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.2 on 2024-10-28 21:27 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("dissemination", "0019_delete_sacvalidationwaiver"), + ] + + operations = [ + migrations.AlterModelTable( + name="disseminationcombined", + table='public_data_v1_0_0"."combined', + ), + ] diff --git a/backend/dissemination/models.py b/backend/dissemination/models.py index 0abf7bee30..43a8391b17 100644 --- a/backend/dissemination/models.py +++ b/backend/dissemination/models.py @@ -627,7 +627,7 @@ class DisseminationCombined(models.Model): # Meta options class Meta: managed = False - db_table = "dissemination_combined" + db_table = 'public_data_v1_0_0"."combined' # General Information report_id = models.TextField( @@ -970,6 +970,9 @@ class Meta: ) +DisseminationCombined.objects = DisseminationCombined.objects.using("fac-snapshot-db") + + class InvalidAuditRecord(models.Model): """Model holds records that have been migrated as is, without validation or changes.""" diff --git a/backend/dissemination/sql/SQL_README.md b/backend/dissemination/sql/SQL_README.md new file mode 100644 index 0000000000..863d2d523b --- /dev/null +++ b/backend/dissemination/sql/SQL_README.md @@ -0,0 +1,176 @@ +# sql folder + +If you are reading this, it is assumed you are FAC developer who is about to touch the SQL in this folder. + +This document describes + +1. The database layout of the FAC +2. What is in this folder +3. How and when it executes +4. Things you should do +5. Things to watch out for + +## the database layout of the FAC + +*This is high-level background for reference.* + +The FAC has two databases. + +**DB1** is `fac-db`. The app talks to this database for all live operations. + +When a user updates their submission, they are updating a `singleauditchecklist` record in DB1. When a user does a *basic* search, they are searching `dissemination_general` in DB1. And, finally, when you update user roles in `/admin`, you are editing a table in DB1. + +**DB2** is `fac-snapshot-db`. began life as a place to do a database snapshot before deploy. It still serves this purpos. However, we are now using it as a place to build a *data pipeline* that is implemented entirely as a sequence of actions in SQL. In this regard, it becomes a *read replica* of sorts where we can serve both *advanced search* and the API. + +**DB2 updates nightly.** The tables described below are *completely* destroyed and rebuilt every night. No data is persisted: DB2 is serves as a *stateless data pipeline*. + +## what is in this folder + +The SQL folder contains one folder for each database: `fac-db` and `fac-snapshot-db`. These names align with the "friendly names" of our database services in our system configuration. + +Inside of each folder are two sub-folders: `pre` and `post`. + +1. `pre` contains SQL we run against the databases *before* migrations. +2. `post` contains SQL we run against the databases *after* migrations. + +In the case of `fac-db` (DB1), we run all of the scripts in the `pre` folder when we deploy, we run migrations, and then we run everything in the `post` folder. This is consistent with what took place previously. + +In the case of `fac-snapshot-db` (DB2), it is slightly different. We run everything in the `pre` folder, and then we run everything in the `post` folder. There are no migrations in DB2, because it is a stateless copy of DB1. + +## pre/post + +The `pre` and `post` folders contain SQL files in execution order. That means that the ordering of the files matters. + +If the following files are in the `pre` folder: + +1. `000_first.sql` +2. `010_nope.SKIP` +3. `020_second.sql` + +then they will execute in the lexigraphical order as shown. *However*, only files ending in `.sql` will be executed. This means that `000_first.sql` will be executed, `010_nope.SKIP` will be skipped, and `020_second.sql` will be run second. (Although it encourages a dirty tree, we *might* want to keep a file in the tree, but not have it execute.) + +### what happens on DB1 (fac-db) + +On DB1, we remove old schemas and tables (if they exist). If they don't exist, we essentially do nothing. + +#### pre + +1. Drop the API schemas. +2. Initialize audit curation code + +The first step is because we will no longer serve the API from DB1. Therefore, all of the API schemas can go away. + +The second is because we now have SQL triggers to support *data curation*. These triggers are defined here. Finally, we *disable* audit curation as a "just-in-case" measure. Because it is a state in the DB, the app could crash, and we would be in a condition of recording all changes to the SAC table. This would be *bad*. So, we do a "disable" as part of startup. + +#### post + +We tear out the old, old, OLD, Census data (used for the cog/over work in early days). + +In the case of DB1, all of the actions could *probably* be `pre` actions. It does not particularly matter. + +### what happens on DB2 (fac-snapshot-db) + +Every night, on DB2, we first back up DB1. Then, we tear down our data pipeline and API, and rebuild it all from the backup we just made. This means that the data pipeline---including the backup---is essentially stateless. + +#### pre + +1. Set up roles (for PostgREST). Without these, PostgREST cannot authenticate/operate. +2. Tear down *all* schemas associated with the data pipeline. +3. Tear down and rebuild sequences used in constructing the new `public_data` tables. + +#### post + +##### Copy the `dissemination_*` tables to a `dissem_copy` schema. + +We do this because the API is going to attach to `dissem_copy.dissemination_*` tables. We do this instead of using `public.dissemination_*` for the simple reason that those tables are overwritten with each deploy. If we attached the API `VIEW`s to the `public` tables, it would interrupt/disrupt/break the pre-deploy backups. So, the first thing we do is make a copy. + +##### Create `public_data` tables. + +These tables are a copy of the `dissem_copy` tables, with some changes. + +1. We create a `combined` table that does a 4x `JOIN` across `general`, `federal_awards`, `passthrough`, and `findings`. This is all 100% public data. (It was previously our `MATERIALIZED VIEW`.) +2. We apply a `general.is_public=true` filter to all tables containing suppressed data, therefore guaranteeing that `notes_to_sefa`, `corrective_action_plans`, and `finding_text` (for example) contain *only* public data. +3. Sequences are inserted in all tables, and a `batch_number`. This is indexed for fast downloading of bulk data. + +This is the "data pipeline." It is copying and modifying data to put it in the "right" shape for our API. This way, our API becomes a simple `SELECT *` in a `VIEW`. + +As new data needs are discovered, it is assumed that the `post` operations on DB2 will implement additional copies/table creations/etc. to extend our data pipeline in order to address customer/user needs. + +##### Create `suppressed_data` tables. + +These are "the same" as the above, but they are filtered to contain only suppressed/Tribal data. + +These tables are only accessible via API if you have gone through our Tribal API attestation/access process. Only Federal agencies are able to obtain API access to this data in order to support their oversight operations. Non-privileged keys will find empty result sets (`[]`) if they attempt to query these tables. + +##### Create `metadata` table. + +A `metadata` table containing counts of rows in all tables create above. + +This also is exposed to `api_v2_0_0`. This allows users to quickly find 1) which tables are present, and 2) how much data is in those tables. This meets customer needs in an important way: when they are downloading data, they want to know "did I get everything?" This lets them do a bulk download via API and then answer that question in a programmatic manner. + +It also serves as a demonstration for one kind of data manipulation that can be used to create new tables and, therefore, new functionality for users via the API. + +##### Create the `api_v1_1_0`. + +This is the same code as previously existed, flaws and all. It points at `dissem_copy` tables, because they are 1:1 with what used to be in DB1. Hence, it "just works" "as-was." + +A good refactoring would be to point these tables at `public_data` tables instead. The views would no longer require `JOIN` statements, and access control could be handled more gracefully. + +##### Create `api_v2_0_0`. + +This points at the `public_data` and `suppressed_data` tables. + +##### Setup permissions + +All of the API access permissions are set in one place after the tables/views are created. + +##### Bring up the API + +We issue a `NOTIFY` to PostgREST which tells it to re-read the schemas and provide an API. + +##### Indexing + +Now, we index *everything*. If something is not performant, *add more indexes*. + + +## possible improvements + +These are largely ticketed. However, there is a Python script floating around to generate `PARTITION` tables. (It is very repetative code, hence having a script to spit out the SQL makes sense. It is assumed it would be run once, manually, and the output added to the sequence.) However, this will multiply the number of tables we have by, like, Nx, where `N` is the number of partitions on each table. Performance testing suggests there is some improvement, but we should see significant improvements with `api_v2_0_0` that are "good enough" for a start without adding the complexity of `PARTITION`s at this point. + +## `sling` + +[sling](https://slingdata.io/) is a very cool tool. It takes a YAML config, and can copy data: + +* From a DB to a different DB +* DB to CSV (locally or direct to an S3 bucket) +* DB to JSON (same) +* ... + +basically, it "slings data around." + +The `sling` folder contains an example that, if wired into the nightly sequence, will generate compressed CSVs of all of the public data. + +In rough pseudocode: + +``` +for each year in the range 2016 - 2030 + for each table in the `public_data` space + sling a CSV to S3 called `-.csv.zip` +``` + +The sling YAML config is auto-generated from a short (manual) Python script. Why? Because I wanted to loop over the years, and wanted to loop over the table names. Also, you can't have more than 1M rows in a CSV, or Excel will choke when you try and open it. Hence, our tables *must* be split, somehow, for it to be useful to our customers. + +This is 95% of the way to providing downloads of bulk data. If we wire it in to the nightly data pipeline (as a last step), it will copy the `public_data` tables out to `/bulk_export/...` in our `private` S3 bucket, and the Django app can provide a page/links for authenticated users to grab those files (single-use URLs). This will 1) slow users down from getting them *all the time*, and give us better logging/tracking of who is accessing the bulk data. + +# running tests + +``` +pytest -s --env local test_api.py +``` + +where the env can be `local`, `preview`, `dev`, `staging`, or `production` to run queries against the API in any of those environments. + +* When running against local, `CYPRESS_API_GOV_JWT` needs to be set in your environment for this to work. And, it needs to match the `Authorization: Bearer` value in the Docker stack. +* `FAC_API_KEY` needs to be set when testing against the production environments. + +These tests are not (yet) integrated with any repeatable framework. They were developed as a way to quickly be confident that access controls were being implemented correctly. It is a ticket to integrate API testing. diff --git a/backend/dissemination/sql/conftest.py b/backend/dissemination/sql/conftest.py new file mode 100644 index 0000000000..baf4dcd4e0 --- /dev/null +++ b/backend/dissemination/sql/conftest.py @@ -0,0 +1,10 @@ +def pytest_addoption(parser): + parser.addoption("--env", action="store", default="local") + + +def pytest_generate_tests(metafunc): + # This is called for every test. Only get/set command line arguments + # if the argument is specified in the list of test "fixturenames". + option_value = metafunc.config.option.env + if "env" in metafunc.fixturenames and option_value is not None: + metafunc.parametrize("env", [option_value]) diff --git a/backend/dissemination/sql/drop_materialized_views.sql b/backend/dissemination/sql/drop_materialized_views.sql deleted file mode 100644 index 83c1e16867..0000000000 --- a/backend/dissemination/sql/drop_materialized_views.sql +++ /dev/null @@ -1,7 +0,0 @@ -DROP MATERIALIZED VIEW IF EXISTS dissemination_combined; - -DROP SEQUENCE IF EXISTS dissemination_combined_id_seq; - -DROP MATERIALIZED VIEW IF EXISTS census_gsa_crosswalk; - -DROP SEQUENCE IF EXISTS census_gsa_crosswalk_id_seq; \ No newline at end of file diff --git a/backend/dissemination/sql/fac-db/post/010_remove_old_census_data.sql b/backend/dissemination/sql/fac-db/post/010_remove_old_census_data.sql new file mode 100644 index 0000000000..8db01324df --- /dev/null +++ b/backend/dissemination/sql/fac-db/post/010_remove_old_census_data.sql @@ -0,0 +1,92 @@ + +-- but the tables are still in prod. This will remove them +-- from the prod environment, where they are no longer needed. +-- OLD RAW CENSUS DATA +-- our own data for this. We have removed this from the deploy, +-- Specifically, it was used in cog/over. However, we now use +-- This data was an early part of the data migration. +-- The cascade will get rid of any associated artifacts, which +-- we certainly do not want. +-------------------------------------- +-------------------------------------- +DROP TABLE IF EXISTS census_agency16 CASCADE; +DROP TABLE IF EXISTS census_agency17 CASCADE; +DROP TABLE IF EXISTS census_agency18 CASCADE; +DROP TABLE IF EXISTS census_agency19 CASCADE; +DROP TABLE IF EXISTS census_agency20 CASCADE; +DROP TABLE IF EXISTS census_agency21 CASCADE; +DROP TABLE IF EXISTS census_agency22 CASCADE; +DROP TABLE IF EXISTS census_captext_formatted19 CASCADE; +DROP TABLE IF EXISTS census_captext_formatted20 CASCADE; +DROP TABLE IF EXISTS census_captext_formatted21 CASCADE; +DROP TABLE IF EXISTS census_captext_formatted22 CASCADE; +DROP TABLE IF EXISTS census_captext19 CASCADE; +DROP TABLE IF EXISTS census_captext20 CASCADE; +DROP TABLE IF EXISTS census_captext21 CASCADE; +DROP TABLE IF EXISTS census_captext22 CASCADE; +DROP TABLE IF EXISTS census_cfda16 CASCADE; +DROP TABLE IF EXISTS census_cfda17 CASCADE; +DROP TABLE IF EXISTS census_cfda18 CASCADE; +DROP TABLE IF EXISTS census_cfda19 CASCADE; +DROP TABLE IF EXISTS census_cfda20 CASCADE; +DROP TABLE IF EXISTS census_cfda21 CASCADE; +DROP TABLE IF EXISTS census_cfda22 CASCADE; +DROP TABLE IF EXISTS census_cpas16 CASCADE; +DROP TABLE IF EXISTS census_cpas17 CASCADE; +DROP TABLE IF EXISTS census_cpas18 CASCADE; +DROP TABLE IF EXISTS census_cpas19 CASCADE; +DROP TABLE IF EXISTS census_cpas20 CASCADE; +DROP TABLE IF EXISTS census_cpas21 CASCADE; +DROP TABLE IF EXISTS census_cpas22 CASCADE; +DROP TABLE IF EXISTS census_duns16 CASCADE; +DROP TABLE IF EXISTS census_duns17 CASCADE; +DROP TABLE IF EXISTS census_duns18 CASCADE; +DROP TABLE IF EXISTS census_duns19 CASCADE; +DROP TABLE IF EXISTS census_duns20 CASCADE; +DROP TABLE IF EXISTS census_duns21 CASCADE; +DROP TABLE IF EXISTS census_duns22 CASCADE; +DROP TABLE IF EXISTS census_eins16 CASCADE; +DROP TABLE IF EXISTS census_eins17 CASCADE; +DROP TABLE IF EXISTS census_eins18 CASCADE; +DROP TABLE IF EXISTS census_eins19 CASCADE; +DROP TABLE IF EXISTS census_eins20 CASCADE; +DROP TABLE IF EXISTS census_eins21 CASCADE; +DROP TABLE IF EXISTS census_eins22 CASCADE; +DROP TABLE IF EXISTS census_findings16 CASCADE; +DROP TABLE IF EXISTS census_findings17 CASCADE; +DROP TABLE IF EXISTS census_findings18 CASCADE; +DROP TABLE IF EXISTS census_findings19 CASCADE; +DROP TABLE IF EXISTS census_findings20 CASCADE; +DROP TABLE IF EXISTS census_findings21 CASCADE; +DROP TABLE IF EXISTS census_findings22 CASCADE; +DROP TABLE IF EXISTS census_findingstext_formatted19 CASCADE; +DROP TABLE IF EXISTS census_findingstext_formatted20 CASCADE; +DROP TABLE IF EXISTS census_findingstext_formatted21 CASCADE; +DROP TABLE IF EXISTS census_findingstext_formatted22 CASCADE; +DROP TABLE IF EXISTS census_findingstext19 CASCADE; +DROP TABLE IF EXISTS census_findingstext20 CASCADE; +DROP TABLE IF EXISTS census_findingstext21 CASCADE; +DROP TABLE IF EXISTS census_findingstext22 CASCADE; +DROP TABLE IF EXISTS census_gen16 CASCADE; +DROP TABLE IF EXISTS census_gen17 CASCADE; +DROP TABLE IF EXISTS census_gen18 CASCADE; +DROP TABLE IF EXISTS census_gen19 CASCADE; +DROP TABLE IF EXISTS census_gen20 CASCADE; +DROP TABLE IF EXISTS census_gen21 CASCADE; +DROP TABLE IF EXISTS census_gen22 CASCADE; +DROP TABLE IF EXISTS census_notes19 CASCADE; +DROP TABLE IF EXISTS census_notes20 CASCADE; +DROP TABLE IF EXISTS census_notes21 CASCADE; +DROP TABLE IF EXISTS census_notes22 CASCADE; +DROP TABLE IF EXISTS census_passthrough16 CASCADE; +DROP TABLE IF EXISTS census_passthrough17 CASCADE; +DROP TABLE IF EXISTS census_passthrough18 CASCADE; +DROP TABLE IF EXISTS census_passthrough19 CASCADE; +DROP TABLE IF EXISTS census_passthrough20 CASCADE; +DROP TABLE IF EXISTS census_passthrough21 CASCADE; +DROP TABLE IF EXISTS census_passthrough22 CASCADE; +DROP TABLE IF EXISTS census_revisions19 CASCADE; +DROP TABLE IF EXISTS census_revisions20 CASCADE; +DROP TABLE IF EXISTS census_revisions21 CASCADE; +DROP TABLE IF EXISTS census_revisions22 CASCADE; +DROP TABLE IF EXISTS census_ueis22 CASCADE; diff --git a/backend/dissemination/sql/fac-db/post/999_finalize.sql b/backend/dissemination/sql/fac-db/post/999_finalize.sql new file mode 100644 index 0000000000..6068a8af5b --- /dev/null +++ b/backend/dissemination/sql/fac-db/post/999_finalize.sql @@ -0,0 +1,2 @@ +-- Currently, there are no finalization actions for fac-db. +-- They would go here. diff --git a/backend/dissemination/sql/fac-db/pre/010_drop_schemas.sql b/backend/dissemination/sql/fac-db/pre/010_drop_schemas.sql new file mode 100644 index 0000000000..ac2a7d582e --- /dev/null +++ b/backend/dissemination/sql/fac-db/pre/010_drop_schemas.sql @@ -0,0 +1,9 @@ +-- On fac-db, we can drop all the schemas that we're no longer +-- serving off of this database. +-- It is a kind of "spring cleaning," and it is OK if it continues to +-- run for the forseeable future. +DROP SCHEMA IF EXISTS api_v1_0_3 CASCADE; +DROP SCHEMA IF EXISTS api_v1_1_0 CASCADE; +DROP SCHEMA IF EXISTS api_v1_1_1 CASCADE; +DROP SCHEMA IF EXISTS admin_api_v1_1_0 CASCADE; +DROP SCHEMA IF EXISTS admin_api_v1_1_1 CASCADE; diff --git a/backend/curation/sql/init_curation_auditing.sql b/backend/dissemination/sql/fac-db/pre/020_init_curation_auditing.sql similarity index 100% rename from backend/curation/sql/init_curation_auditing.sql rename to backend/dissemination/sql/fac-db/pre/020_init_curation_auditing.sql diff --git a/backend/dissemination/sql/fac-db/pre/030_disable_audit_curation.sql b/backend/dissemination/sql/fac-db/pre/030_disable_audit_curation.sql new file mode 100644 index 0000000000..828555c22a --- /dev/null +++ b/backend/dissemination/sql/fac-db/pre/030_disable_audit_curation.sql @@ -0,0 +1,13 @@ +----------------------- +-- AUDIT CURATION +-- This disables curation tracking on the below tables. +-- We do this at startup *just in case* the app crashed while we were +-- doing data curation. If that were true, the DB would be recording +-- EVERY change to these two tables. Given that the `singleauditchecklist` +-- table is hit *constantly*, this would be bad. +-- +-- Therefore, one of the first things we do every time we startup is +-- make sure that this state is disabled in the database. + +select curation.disable_tracking('public.audit_singleauditchecklist'::regclass); +select curation.disable_tracking('public.support_cognizantassignment'::regclass); diff --git a/backend/dissemination/sql/fac-snapshot-db/create_partition_statements.py b/backend/dissemination/sql/fac-snapshot-db/create_partition_statements.py new file mode 100644 index 0000000000..94f01a146e --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/create_partition_statements.py @@ -0,0 +1,112 @@ +import argparse +import sys + +# for i in general federal_awards combined ; do python create_partition_statements.py general 20 ; done + +# rm post/001_partitioning.sql ; for i in general federal_awards combined ; do python create_partition_statements.py $i >> post/001_partitioning.sql ; done + + +#################################### +# by hash (makes no sense?) +#################################### +def partition_by_sequence_hash(): + + parser = argparse.ArgumentParser("simple_example") + parser.add_argument( + "table_name", help="Table to create partition statements for", type=str + ) + parser.add_argument("number_of_partitions", help="number of partitions", type=int) + args = parser.parse_args() + table_name = args.table_name + number_of_partitions = args.number_of_partitions + + print( + f""" +------------------------------------------------------ +-- {table_name} - {number_of_partitions} partitions (hash seq) +------------------------------------------------------ +""" + ) + print("SET search_path TO public_data_v1_0_0;") + + print( + f"ALTER TABLE public_data_v1_0_0.{table_name} RENAME TO {table_name}_to_be_removed;" + ) + print( + f"""CREATE TABLE public_data_v1_0_0.{table_name} + (LIKE public_data_v1_0_0.{table_name}_to_be_removed) + PARTITION BY hash(seq); + """ + ) + + for ndx in range(number_of_partitions): + print( + f""" + DROP TABLE IF EXISTS public_data_v1_0_0.part_{table_name}_{ndx:02}; + CREATE TABLE public_data_v1_0_0.part_{table_name}_{ndx:02} + PARTITION OF public_data_v1_0_0.{table_name} + FOR VALUES WITH (modulus {number_of_partitions}, remainder {ndx}); + """ + ) + print( + f""" + INSERT INTO public_data_v1_0_0.{table_name} + SELECT * FROM public_data_v1_0_0.{table_name}_to_be_removed; + """ + ) + print(f"DROP TABLE public_data_v1_0_0.{table_name}_to_be_removed;") + + +#################################### +# by audit year +#################################### +def partition_by_audit_year(): + + parser = argparse.ArgumentParser("simple_example") + parser.add_argument( + "table_name", help="Table to create partition statements for", type=str + ) + args = parser.parse_args() + table_name = args.table_name + + print( + f""" +------------------------------------------------------ +-- {table_name} - partitions (audit_year) +------------------------------------------------------ +-- python {' '.join(sys.argv)} +""" + ) + print("SET search_path TO public_data_v1_0_0;") + + print( + f"ALTER TABLE public_data_v1_0_0.{table_name} RENAME TO {table_name}_to_be_removed;" + ) + print( + f"""CREATE TABLE public_data_v1_0_0.{table_name} + (LIKE public_data_v1_0_0.{table_name}_to_be_removed) + PARTITION BY list(audit_year); + """ + ) + + for ndx in range(16, 30): + print( + f""" + DROP TABLE IF EXISTS public_data_v1_0_0.part_{table_name}_20{ndx:02}; + CREATE TABLE public_data_v1_0_0.part_{table_name}_20{ndx:02} + PARTITION OF public_data_v1_0_0.{table_name} + FOR VALUES IN ('20{ndx:02}'); + """ + ) + print( + f""" + INSERT INTO public_data_v1_0_0.{table_name} + SELECT * FROM public_data_v1_0_0.{table_name}_to_be_removed; + """ + ) + print(f"DROP TABLE public_data_v1_0_0.{table_name}_to_be_removed;") + + +if __name__ == "__main__": + # partition_by_sequence_hash() + partition_by_audit_year() diff --git a/backend/dissemination/sql/fac-snapshot-db/post/001_partitioning.SKIP b/backend/dissemination/sql/fac-snapshot-db/post/001_partitioning.SKIP new file mode 100644 index 0000000000..939c45fe1f --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/001_partitioning.SKIP @@ -0,0 +1,310 @@ + +------------------------------------------------------ +-- general - partitions (audit_year) +------------------------------------------------------ +-- python create_partition_statements.py general + +-- 20241016 SKIPPED +-- This code demonstrates how to partition the tables that are slung over for +-- use in the API. We do this *post* because it happens after migration, but +-- before we sling data in. +-- This represents a large change, and is not strictly needed right now. +-- But, it serves as a good example of how to do it. + +SET search_path TO public_data_v1_0_0; +ALTER TABLE public_data_v1_0_0.general RENAME TO general_to_be_removed; +CREATE TABLE public_data_v1_0_0.general + (LIKE public_data_v1_0_0.general_to_be_removed) + PARTITION BY list(audit_year); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2016; + CREATE TABLE public_data_v1_0_0.part_general_2016 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2016'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2017; + CREATE TABLE public_data_v1_0_0.part_general_2017 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2017'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2018; + CREATE TABLE public_data_v1_0_0.part_general_2018 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2018'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2019; + CREATE TABLE public_data_v1_0_0.part_general_2019 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2019'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2020; + CREATE TABLE public_data_v1_0_0.part_general_2020 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2020'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2021; + CREATE TABLE public_data_v1_0_0.part_general_2021 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2021'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2022; + CREATE TABLE public_data_v1_0_0.part_general_2022 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2022'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2023; + CREATE TABLE public_data_v1_0_0.part_general_2023 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2023'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2024; + CREATE TABLE public_data_v1_0_0.part_general_2024 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2024'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2025; + CREATE TABLE public_data_v1_0_0.part_general_2025 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2025'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2026; + CREATE TABLE public_data_v1_0_0.part_general_2026 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2026'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2027; + CREATE TABLE public_data_v1_0_0.part_general_2027 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2027'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2028; + CREATE TABLE public_data_v1_0_0.part_general_2028 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2028'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_general_2029; + CREATE TABLE public_data_v1_0_0.part_general_2029 + PARTITION OF public_data_v1_0_0.general + FOR VALUES IN ('2029'); + + + INSERT INTO public_data_v1_0_0.general + SELECT * FROM public_data_v1_0_0.general_to_be_removed; + +DROP TABLE public_data_v1_0_0.general_to_be_removed; + +------------------------------------------------------ +-- federal_awards - partitions (audit_year) +------------------------------------------------------ +-- python create_partition_statements.py federal_awards + +SET search_path TO public_data_v1_0_0; +ALTER TABLE public_data_v1_0_0.federal_awards RENAME TO federal_awards_to_be_removed; +CREATE TABLE public_data_v1_0_0.federal_awards + (LIKE public_data_v1_0_0.federal_awards_to_be_removed) + PARTITION BY list(audit_year); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2016; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2016 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2016'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2017; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2017 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2017'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2018; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2018 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2018'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2019; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2019 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2019'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2020; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2020 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2020'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2021; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2021 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2021'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2022; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2022 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2022'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2023; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2023 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2023'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2024; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2024 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2024'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2025; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2025 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2025'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2026; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2026 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2026'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2027; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2027 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2027'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2028; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2028 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2028'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_federal_awards_2029; + CREATE TABLE public_data_v1_0_0.part_federal_awards_2029 + PARTITION OF public_data_v1_0_0.federal_awards + FOR VALUES IN ('2029'); + + + INSERT INTO public_data_v1_0_0.federal_awards + SELECT * FROM public_data_v1_0_0.federal_awards_to_be_removed; + +DROP TABLE public_data_v1_0_0.federal_awards_to_be_removed; + +------------------------------------------------------ +-- combined - partitions (audit_year) +------------------------------------------------------ +-- python create_partition_statements.py combined + +SET search_path TO public_data_v1_0_0; +ALTER TABLE public_data_v1_0_0.combined RENAME TO combined_to_be_removed; +CREATE TABLE public_data_v1_0_0.combined + (LIKE public_data_v1_0_0.combined_to_be_removed) + PARTITION BY list(audit_year); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2016; + CREATE TABLE public_data_v1_0_0.part_combined_2016 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2016'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2017; + CREATE TABLE public_data_v1_0_0.part_combined_2017 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2017'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2018; + CREATE TABLE public_data_v1_0_0.part_combined_2018 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2018'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2019; + CREATE TABLE public_data_v1_0_0.part_combined_2019 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2019'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2020; + CREATE TABLE public_data_v1_0_0.part_combined_2020 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2020'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2021; + CREATE TABLE public_data_v1_0_0.part_combined_2021 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2021'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2022; + CREATE TABLE public_data_v1_0_0.part_combined_2022 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2022'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2023; + CREATE TABLE public_data_v1_0_0.part_combined_2023 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2023'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2024; + CREATE TABLE public_data_v1_0_0.part_combined_2024 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2024'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2025; + CREATE TABLE public_data_v1_0_0.part_combined_2025 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2025'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2026; + CREATE TABLE public_data_v1_0_0.part_combined_2026 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2026'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2027; + CREATE TABLE public_data_v1_0_0.part_combined_2027 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2027'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2028; + CREATE TABLE public_data_v1_0_0.part_combined_2028 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2028'); + + + DROP TABLE IF EXISTS public_data_v1_0_0.part_combined_2029; + CREATE TABLE public_data_v1_0_0.part_combined_2029 + PARTITION OF public_data_v1_0_0.combined + FOR VALUES IN ('2029'); + + + INSERT INTO public_data_v1_0_0.combined + SELECT * FROM public_data_v1_0_0.combined_to_be_removed; + +DROP TABLE public_data_v1_0_0.combined_to_be_removed; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/002_define_batch_size.sql b/backend/dissemination/sql/fac-snapshot-db/post/002_define_batch_size.sql new file mode 100644 index 0000000000..d6d5aee111 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/002_define_batch_size.sql @@ -0,0 +1,16 @@ +-- The batch_size is used to determine how many rows of data +-- are available in a single download. The PostgREST limit is +-- (as of 20241024) set to 20K rows. Howwever, we could make our +-- batches smaller. (This doesn't make things better, BTW.) So, +-- we set our batch size to the same size as the PgREST limit. +-- As a result, public tables are created with a batch number column, +-- and that batch number is incremented as DIV(n, batch_size()). +-- +-- This is defined as a function because there is no good way to define +-- a constant in Postgres/SQL. +-- https://stackoverflow.com/questions/13316773/is-there-a-way-to-define-a-named-constant-in-a-postgresql-query + +CREATE OR REPLACE FUNCTION public.batch_size() + RETURNS INT + LANGUAGE sql IMMUTABLE PARALLEL SAFE AS +'SELECT 20000'; diff --git a/backend/dissemination/api/api_v1_0_3/create_views.sql b/backend/dissemination/sql/fac-snapshot-db/post/010_api_v1_0_3.SKIP similarity index 69% rename from backend/dissemination/api/api_v1_0_3/create_views.sql rename to backend/dissemination/sql/fac-snapshot-db/post/010_api_v1_0_3.SKIP index cbaf47d169..da5175afd6 100644 --- a/backend/dissemination/api/api_v1_0_3/create_views.sql +++ b/backend/dissemination/sql/fac-snapshot-db/post/010_api_v1_0_3.SKIP @@ -1,5 +1,88 @@ -begin; +-- 20241024 +-- The 1.0.3 API just plain returns wrong data at this point. +-- Although it may break some clients, we are going to retire it rapidly. +-- Version 1.1.0 has all the same fields (plus 1-2 more), and it correctly +-- publishes all data, as opposed to suppressing all Tribal/Tribal org data. +------------------------------------------------------------------ +-- GATE +------------------------------------------------------------------ +-- We only want the API to run if certain conditions are met. +-- We could try and encode that in the `bash` portion of the code. +-- Or, we could just gate things at the top of our SQL. +-- If the conditions are not met, we should exit noisily. +-- A cast to regclass will fail with an exception if the table +-- does not exist. +DO LANGUAGE plpgsql +$GATE$ + DECLARE + the_schema varchar := 'public'; + the_table varchar := 'dissemination_general'; + api_ver varchar := 'API_v1_0_3'; + BEGIN + IF EXISTS ( + SELECT FROM pg_tables + WHERE schemaname = the_schema + AND tablename = the_table + ) + THEN + RAISE info '% Gate condition met. Continuing.', api_ver; + ELSE + RAISE exception '% %.% not found.', api_ver, the_schema, the_table; + END IF; + END +$GATE$; + +SELECT 'public.dissemination_general'::regclass; + +DO +$APIV103$ + BEGIN + DROP SCHEMA IF EXISTS api_v1_0_3 CASCADE; + DROP SCHEMA IF EXISTS api_v1_0_3_functions CASCADE; + + IF NOT EXISTS (select schema_name + from information_schema.schemata + where schema_name = 'api_v1_0_3') then + CREATE SCHEMA api_v1_0_3; + CREATE SCHEMA api_v1_0_3_functions; + + GRANT USAGE ON SCHEMA api_v1_0_3_functions to api_fac_gov; + + -- Grant access to tables and views + alter default privileges + in schema api_v1_0_3 + grant select + -- this includes views + on tables + to api_fac_gov; + + -- Grant access to sequences, if we have them + grant usage on schema api_v1_0_3 to api_fac_gov; + grant select, usage on all sequences in schema api_v1_0_3 to api_fac_gov; + alter default privileges + in schema api_v1_0_3 + grant select, usage + on sequences + to api_fac_gov; + end if; + END +$APIV103$ +; + +------------------------------------------------------------------ +-- functions +------------------------------------------------------------------ +create or replace function api_v1_0_3_functions.has_tribal_data_access() returns boolean +as $has_tribal_data_access$ +BEGIN + RETURN 0::BOOLEAN; +END; +$has_tribal_data_access$ LANGUAGE plpgsql; + +------------------------------------------------------------------ +-- VIEWs +------------------------------------------------------------------ --------------------------------------- -- finding_text --------------------------------------- @@ -12,8 +95,8 @@ create view api_v1_0_3.findings_text as ft.contains_chart_or_table, ft.finding_text from - dissemination_findingtext ft, - dissemination_general gen + public.dissemination_findingtext ft, + public.dissemination_general gen where ft.report_id = gen.report_id and @@ -34,8 +117,8 @@ create view api_v1_0_3.additional_ueis as --- uei.additional_uei from - dissemination_general gen, - dissemination_additionaluei uei + public.dissemination_general gen, + public.dissemination_additionaluei uei where gen.report_id = uei.report_id and @@ -65,8 +148,8 @@ create view api_v1_0_3.findings as finding.is_significant_deficiency, finding.type_requirement from - dissemination_finding finding, - dissemination_general gen + public.dissemination_finding finding, + public.dissemination_general gen where finding.report_id = gen.report_id and @@ -105,8 +188,8 @@ create view api_v1_0_3.federal_awards as award.is_passthrough_award, award.passthrough_amount from - dissemination_federalaward award, - dissemination_general gen + public.dissemination_federalaward award, + public.dissemination_general gen where award.report_id = gen.report_id and @@ -130,8 +213,8 @@ create view api_v1_0_3.corrective_action_plans as ct.contains_chart_or_table, ct.planned_action from - dissemination_CAPText ct, - dissemination_General gen + public.dissemination_CAPText ct, + public.dissemination_General gen where ct.report_id = gen.report_id and @@ -157,8 +240,8 @@ create view api_v1_0_3.notes_to_sefa as note.content, note.contains_chart_or_table from - dissemination_general gen, - dissemination_note note + public.dissemination_general gen, + public.dissemination_note note where note.report_id = gen.report_id and @@ -181,8 +264,8 @@ create view api_v1_0_3.passthrough as pass.passthrough_id, pass.passthrough_name from - dissemination_general as gen, - dissemination_passthrough as pass + public.dissemination_general as gen, + public.dissemination_passthrough as pass where gen.report_id = pass.report_id and @@ -270,7 +353,7 @@ create view api_v1_0_3.general as ELSE 'Yes' END AS is_secondary_auditors from - dissemination_general gen + public.dissemination_general gen where gen.is_public = true or @@ -298,8 +381,8 @@ create view api_v1_0_3.secondary_auditors as sa.address_state, sa.address_zipcode from - dissemination_General gen, - dissemination_SecondaryAuditor sa + public.dissemination_General gen, + public.dissemination_SecondaryAuditor sa where sa.report_id = gen.report_id and @@ -317,8 +400,8 @@ create view api_v1_0_3.additional_eins as --- ein.additional_ein from - dissemination_general gen, - dissemination_additionalein ein + public.dissemination_general gen, + public.dissemination_additionalein ein where gen.report_id = ein.report_id and @@ -327,9 +410,3 @@ create view api_v1_0_3.additional_eins as (gen.is_public = false and api_v1_0_3_functions.has_tribal_data_access())) order by ein.id ; - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/sql/fac-snapshot-db/post/010_copy_dissem_tables.sql b/backend/dissemination/sql/fac-snapshot-db/post/010_copy_dissem_tables.sql new file mode 100644 index 0000000000..50d65999df --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/010_copy_dissem_tables.sql @@ -0,0 +1,208 @@ +--------------------------------------------- +-- COPY DISSEMINATION TABLES +-- WHY? Because our deploy process will want to make backups, +-- we will want to DROP and recreate those tables. If we +-- point our API at those tables (even in fac-snapshot-db), we +-- will have a problem at deploy-time. +-- +-- So, the fix is to take the copy that is in fac-snapshot-db (the +-- actual snapshot of the prod data) and make *one more copy*. We will +-- then point the API at that. We do this becasue we can (nightly) drop +-- the copy, tear down the API, make a fresh backup (and copy) and then +-- stand the API back up. +-- +-- It is a long way to go, but it kinda makes our entire data pipeline +-- "stateless," in a way. + +-- CREATE TABLE [Table to copy To] +-- AS [Table to copy From] + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_additionalein() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_additionalein + AS SELECT * FROM public.dissemination_additionalein; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_additionaluei() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_additionaluei + AS SELECT * FROM public.dissemination_additionaluei; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_captext() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_captext + AS SELECT * FROM public.dissemination_captext; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_federalaward() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_federalaward + AS SELECT * FROM public.dissemination_federalaward; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_finding() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_finding + AS SELECT * FROM public.dissemination_finding; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_findingtext() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_findingtext + AS SELECT * FROM public.dissemination_findingtext; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_general() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_general + AS SELECT * FROM public.dissemination_general; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_invalidauditrecord() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_invalidauditrecord + AS SELECT * FROM public.dissemination_invalidauditrecord; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_migrationinspectionrecord() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_migrationinspectionrecord + AS SELECT * FROM public.dissemination_migrationinspectionrecord; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_note() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_note + AS SELECT * FROM public.dissemination_note; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_passthrough() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_passthrough + AS SELECT * FROM public.dissemination_passthrough; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_secondaryauditor() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_secondaryauditor + AS SELECT * FROM public.dissemination_secondaryauditor; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_tribalapiaccesskeyids() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_tribalapiaccesskeyids + AS SELECT * FROM public.dissemination_tribalapiaccesskeyids; + END + $ct$ + LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION dissem_copy.create_dissemination_onetimeaccess() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE dissem_copy.dissemination_onetimeaccess + AS SELECT * FROM public.dissemination_onetimeaccess; + END + $ct$ + LANGUAGE plpgsql; + + +DO LANGUAGE plpgsql +$go$ + BEGIN + RAISE info 'create_dissemination_additionalein'; + PERFORM dissem_copy.create_dissemination_additionalein(); + RAISE info 'create_dissemination_additionaluei'; + PERFORM dissem_copy.create_dissemination_additionaluei(); + RAISE info 'create_dissemination_captext'; + PERFORM dissem_copy.create_dissemination_captext(); + RAISE info 'create_dissemination_federalaward'; + PERFORM dissem_copy.create_dissemination_federalaward(); + RAISE info 'create_dissemination_finding'; + PERFORM dissem_copy.create_dissemination_finding(); + RAISE info 'create_dissemination_findingtext'; + PERFORM dissem_copy.create_dissemination_findingtext(); + RAISE info 'create_dissemination_general'; + PERFORM dissem_copy.create_dissemination_general(); + RAISE info 'create_dissemination_invalidauditrecord'; + PERFORM dissem_copy.create_dissemination_invalidauditrecord(); + RAISE info 'create_dissemination_migrationinspectionrecord'; + PERFORM dissem_copy.create_dissemination_migrationinspectionrecord(); + RAISE info 'create_dissemination_note'; + PERFORM dissem_copy.create_dissemination_note(); + RAISE info 'create_dissemination_passthrough'; + PERFORM dissem_copy.create_dissemination_passthrough(); + RAISE info 'create_dissemination_secondaryauditor'; + PERFORM dissem_copy.create_dissemination_secondaryauditor(); + RAISE INFO 'dissemination_tribalapiaccesskeyids'; + PERFORM dissem_copy.create_dissemination_tribalapiaccesskeyids(); + RAISE info 'create_dissemination_onetimeaccess'; + PERFORM dissem_copy.create_dissemination_onetimeaccess(); + END +$go$; + diff --git a/backend/dissemination/sql/fac-snapshot-db/post/020_public_data_v1_0_0_tables.sql b/backend/dissemination/sql/fac-snapshot-db/post/020_public_data_v1_0_0_tables.sql new file mode 100644 index 0000000000..061873884f --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/020_public_data_v1_0_0_tables.sql @@ -0,0 +1,736 @@ +----------------------------------------------------------- +-- general +----------------------------------------------------------- +-- We do general first because all other tables are built off of it. +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_general() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.general AS + SELECT + gen.id as id, + NEXTVAL('public_data_v1_0_0.seq_general') AS seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.agencies_with_prior_findings, + gen.audit_period_covered, + gen.audit_type, + gen.auditee_address_line_1, + gen.auditee_certified_date, + gen.auditee_certify_name, + gen.auditee_certify_title, + gen.auditee_city, + gen.auditee_contact_name, + gen.auditee_contact_title, + gen.auditee_ein, + gen.auditee_email, + gen.auditee_name, + gen.auditee_phone, + gen.auditee_state, + gen.auditee_zip, + gen.auditor_address_line_1, + gen.auditor_certified_date, + gen.auditor_certify_name, + gen.auditor_certify_title, + gen.auditor_city, + gen.auditor_contact_name, + gen.auditor_contact_title, + gen.auditor_country, + gen.auditor_ein, + gen.auditor_email, + gen.auditor_firm_name, + gen.auditor_foreign_address, + gen.auditor_phone, + gen.auditor_state, + gen.auditor_zip, + gen.cognizant_agency, + gen.data_source, + gen.date_created, + gen.dollar_threshold, + gen.entity_type, + gen.fac_accepted_date, + gen.fy_end_date, + gen.fy_start_date, + gen.gaap_results, + gen.is_additional_ueis, + gen.is_aicpa_audit_guide_included, + gen.is_going_concern_included, + gen.is_internal_control_deficiency_disclosed, + gen.is_internal_control_material_weakness_disclosed, + gen.is_low_risk_auditee, + gen.is_material_noncompliance_disclosed, + CASE EXISTS + ( + SELECT + ein.report_id + FROM + dissemination_additionalein ein + WHERE + ein.report_id = gen.report_id + ) + WHEN FALSE THEN 'No' + ELSE 'Yes' + END AS is_multiple_eins, + gen.is_public, + CASE EXISTS + ( + SELECT + aud.report_id + FROM + dissemination_secondaryauditor aud + WHERE + aud.report_id = gen.report_id + ) + WHEN FALSE THEN 'No' + ELSE 'Yes' + END AS is_secondary_auditors, + gen.is_sp_framework_required, + gen.number_months, + gen.oversight_agency, + gen.ready_for_certification_date, + gen.sp_framework_basis, + gen.sp_framework_opinions, + gen.submitted_date, + gen.total_amount_expended, + gen.type_audit_code + FROM + dissem_copy.dissemination_general gen + ORDER BY gen.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.general + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.general SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + + +----------------------------------------------------------- +-- addition_eins +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_additional_eins() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.additional_eins AS + SELECT + ein.id as id, + NEXTVAL('public_data_v1_0_0.seq_additional_eins') AS seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + ein.additional_ein + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_additionalein ein + WHERE + gen.report_id = ein.report_id + ORDER BY ein.id; + + ALTER TABLE public_data_v1_0_0.additional_eins + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.additional_eins SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + + +----------------------------------------------------------- +-- additional_ueis +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_additional_ueis() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.additional_ueis AS + SELECT + uei.id as id, + NEXTVAL('public_data_v1_0_0.seq_additional_ueis') AS seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + uei.additional_uei + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_additionaluei uei + WHERE + gen.report_id = uei.report_id + ORDER BY uei.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.additional_ueis + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.additional_ueis SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- corrective_action_plans +----------------------------------------------------------- + +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_corrective_action_plans() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.corrective_action_plans AS + SELECT + cap.id as id, + NEXTVAL('public_data_v1_0_0.seq_corrective_action_plans') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + cap.contains_chart_or_table, + cap.finding_ref_number, + cap.planned_action + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_captext cap + WHERE + cap.report_id = gen.report_id + AND + -- Only include the public corrective action plans. + gen.is_public = true + ORDER BY cap.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.corrective_action_plans + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.corrective_action_plans SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- federal_awards +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_federal_awards() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.federal_awards AS + SELECT + award.id as id, + NEXTVAL('public_data_v1_0_0.seq_federal_awards') as seq, + dg.report_id, + dg.auditee_uei, + dg.audit_year, + dg.fac_accepted_date, + concat(award.federal_agency_prefix,'.',award.federal_award_extension) as aln, + --- + award.additional_award_identification, + award.amount_expended, + award.audit_report_type, + award.award_reference, + award.cluster_name, + award.cluster_total, + award.federal_agency_prefix, + award.federal_award_extension, + award.federal_program_name, + award.federal_program_total, + award.findings_count, + award.is_direct, + award.is_loan, + award.is_major, + award.is_passthrough_award, + award.loan_balance, + award.other_cluster_name, + award.passthrough_amount, + award.state_cluster_name + FROM + public_data_v1_0_0.general dg, + dissem_copy.dissemination_federalaward award + WHERE + award.report_id = dg.report_id + ORDER BY award.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.federal_awards + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.federal_awards SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- findings +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_findings() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.findings AS + SELECT + finding.id as id, + NEXTVAL('public_data_v1_0_0.seq_findings') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + finding.award_reference, + finding.is_material_weakness, + finding.is_modified_opinion, + finding.is_other_findings, + finding.is_other_matters, + finding.is_questioned_costs, + finding.is_repeat_finding, + finding.is_significant_deficiency, + finding.prior_finding_ref_numbers, + finding.reference_number, + finding.type_requirement + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_finding finding + WHERE + finding.report_id = gen.report_id + ORDER BY finding.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.findings + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.findings SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- findings_text +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_findings_text() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.findings_text AS + SELECT + ft.id as id, + NEXTVAL('public_data_v1_0_0.seq_findings_text') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + ft.finding_ref_number, + ft.contains_chart_or_table, + ft.finding_text + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_findingtext ft + WHERE + ft.report_id = gen.report_id + AND + gen.is_public = true + ORDER BY ft.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.findings_text + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.findings_text SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- notes_to_sefa +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_notes_to_sefa() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.notes_to_sefa AS + SELECT + note.id as id, + NEXTVAL('public_data_v1_0_0.seq_notes_to_sefa') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + note.accounting_policies, + note.contains_chart_or_table, + note.content, + note.is_minimis_rate_used, + note.note_title as title, + note.rate_explained + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_note note + WHERE + note.report_id = gen.report_id + AND + -- Some notes are not public. + gen.is_public = true + ORDER BY note.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.notes_to_sefa + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.notes_to_sefa SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- passthrough +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_passthrough() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.passthrough AS + SELECT + pass.id as id, + NEXTVAL('public_data_v1_0_0.seq_passthrough') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + pass.award_reference, + pass.passthrough_id, + pass.passthrough_name + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_passthrough pass + WHERE + gen.report_id = pass.report_id + ORDER BY pass.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.passthrough + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.passthrough SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- secondary_auditors +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_secondary_auditors() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.secondary_auditors AS + SELECT + sa.id as id, + NEXTVAL('public_data_v1_0_0.seq_secondary_auditors') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + sa.address_city, + sa.address_state, + sa.address_street, + sa.address_zipcode, + sa.auditor_ein, + sa.auditor_name, + sa.contact_email, + sa.contact_name, + sa.contact_phone, + sa.contact_title + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_secondaryauditor sa + WHERE + sa.report_id = gen.report_id + ORDER BY sa.id; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.secondary_auditors + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.secondary_auditors SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- combined +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_combined() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.combined AS + SELECT + dg.report_id, + NEXTVAL('public_data_v1_0_0.seq_combined') as seq, + dfa.award_reference, + df.reference_number, + concat(dfa.federal_agency_prefix,'.',dfa.federal_award_extension) as aln, + -- + -- general + -- + dg.id as general_row_id, + dg.auditee_uei, + dg.audit_year, + dg.agencies_with_prior_findings, + dg.audit_period_covered, + dg.audit_type, + dg.auditee_address_line_1, + dg.auditee_certified_date, + dg.auditee_certify_name, + dg.auditee_certify_title, + dg.auditee_city, + dg.auditee_contact_name, + dg.auditee_contact_title, + dg.auditee_ein, + dg.auditee_email, + dg.auditee_name, + dg.auditee_phone, + dg.auditee_state, + dg.auditee_zip, + dg.auditor_address_line_1, + dg.auditor_certified_date, + dg.auditor_certify_name, + dg.auditor_certify_title, + dg.auditor_city, + dg.auditor_contact_name, + dg.auditor_contact_title, + dg.auditor_country, + dg.auditor_ein, + dg.auditor_email, + dg.auditor_firm_name, + dg.auditor_foreign_address, + dg.auditor_phone, + dg.auditor_state, + dg.auditor_zip, + dg.cognizant_agency, + dg.data_source, + dg.date_created, + dg.dollar_threshold, + dg.entity_type, + dg.fac_accepted_date, + dg.fy_end_date, + dg.fy_start_date, + dg.gaap_results, + dg.is_additional_ueis, + dg.is_aicpa_audit_guide_included, + dg.is_going_concern_included, + dg.is_internal_control_deficiency_disclosed, + dg.is_internal_control_material_weakness_disclosed, + dg.is_low_risk_auditee, + dg.is_material_noncompliance_disclosed, + dg.is_multiple_eins, + dg.is_public, + dg.is_secondary_auditors, + dg.is_sp_framework_required, + dg.number_months, + dg.oversight_agency, + dg.ready_for_certification_date, + dg.sp_framework_basis, + dg.sp_framework_opinions, + dg.submitted_date, + dg.total_amount_expended, + dg.type_audit_code, + -- + -- federal_award + -- + dfa.id as federal_award_row_id, + dfa.additional_award_identification, + dfa.amount_expended, + dfa.audit_report_type, + dfa.cluster_name, + dfa.cluster_total, + dfa.federal_agency_prefix, + dfa.federal_award_extension, + dfa.federal_program_name, + dfa.federal_program_total, + dfa.findings_count, + dfa.is_direct, + dfa.is_loan, + dfa.is_major, + dfa.is_passthrough_award, + dfa.loan_balance, + dfa.other_cluster_name, + dfa.passthrough_amount, + dfa.state_cluster_name, + -- + -- finding + -- + df.id as finding_row_id, + df.is_material_weakness, + df.is_modified_opinion, + df.is_other_findings, + df.is_other_matters, + df.is_questioned_costs, + df.is_repeat_finding, + df.is_significant_deficiency, + df.prior_finding_ref_numbers, + df.type_requirement, + -- + -- passthrough + -- + dp.id as passthrough_row_id, + dp.passthrough_id, + dp.passthrough_name + FROM + dissem_copy.dissemination_federalaward dfa + LEFT JOIN public_data_v1_0_0.general dg + ON dfa.report_id = dg.report_id + LEFT JOIN dissem_copy.dissemination_finding df + ON dfa.report_id = df.report_id + AND dfa.award_reference = df.award_reference + LEFT JOIN dissem_copy.dissemination_passthrough dp + ON dfa.report_id = dp.report_id + AND dfa.award_reference = dp.award_reference + ORDER BY seq + ; + + -- For advanced search, Django wants an `id` column. + ALTER TABLE public_data_v1_0_0.combined + ADD COLUMN id INTEGER; + UPDATE public_data_v1_0_0.combined SET id=seq; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.combined + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.combined SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- migration_inspection_record +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_migration_inspection_record() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.migration_inspection_record AS + SELECT + mir.id AS id, + NEXTVAL('public_data_v1_0_0.seq_migration_inspection_record') as seq, + mir.audit_year, + mir.dbkey, + mir.report_id, + mir.run_datetime, + mir.additional_ein, + mir.additional_uei, + mir.cap_text, + mir.federal_award, + mir.finding, + mir.finding_text, + mir.general, + mir.note, + mir.passthrough, + mir.secondary_auditor + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_migrationinspectionrecord mir + WHERE + mir.report_id = gen.report_id + AND + gen.is_public = true + ; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.migration_inspection_record + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.migration_inspection_record SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- invalid_audit_record +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION public_data_v1_0_0_functions.create_invalid_audit_record() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE public_data_v1_0_0.invalid_audit_record AS + SELECT + iar.id AS id, + NEXTVAL('public_data_v1_0_0.seq_invalid_audit_record') as seq, + iar.audit_year, + iar.dbkey, + iar.report_id, + iar.run_datetime, + iar.additional_ein, + iar.additional_uei, + iar.cap_text, + iar.federal_award, + iar.finding, + iar.finding_text, + iar.general, + iar.note, + iar.passthrough, + iar.secondary_auditor + FROM + dissem_copy.dissemination_invalidauditrecord iar, + public_data_v1_0_0.general gen + WHERE + iar.report_id = gen.report_id + AND + gen.is_public = true + ; + + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.invalid_audit_record + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.invalid_audit_record SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- CONDITIONAL TABLE CREATION +-- We make this conditional at startup/on deploy. +-- The reason is that every time we deploy, this would tear down +-- the entire API, interrupting service. We only do that nightly, if we can. +-- However, on a clean deploy or a first deploy to a 2-DB config, we will +-- need this to run. +----------------------------------------------------------- +DO LANGUAGE plpgsql +$GO$ + BEGIN + RAISE info 'Creating general'; + PERFORM public_data_v1_0_0_functions.create_general(); + RAISE info 'Creating additional_eins'; + PERFORM public_data_v1_0_0_functions.create_additional_eins(); + RAISE info 'Creating additional_ueis'; + PERFORM public_data_v1_0_0_functions.create_additional_ueis(); + RAISE info 'Creating corrective_action_plans'; + PERFORM public_data_v1_0_0_functions.create_corrective_action_plans(); + RAISE info 'Creating federal_awards'; + PERFORM public_data_v1_0_0_functions.create_federal_awards(); + RAISE info 'Creating findings'; + PERFORM public_data_v1_0_0_functions.create_findings(); + RAISE info 'Creating findings_text'; + PERFORM public_data_v1_0_0_functions.create_findings_text(); + RAISE info 'Creating notes_to_sefa'; + PERFORM public_data_v1_0_0_functions.create_notes_to_sefa(); + RAISE info 'Creating passthrough'; + PERFORM public_data_v1_0_0_functions.create_passthrough(); + RAISE info 'Creating secondary_auditors'; + PERFORM public_data_v1_0_0_functions.create_secondary_auditors(); + RAISE info 'Creating combined'; + PERFORM public_data_v1_0_0_functions.create_combined(); + RAISE info 'Creating migration_inspection_record'; + PERFORM public_data_v1_0_0_functions.create_migration_inspection_record(); + RAISE info 'Create invalid_audit_record'; + PERFORM public_data_v1_0_0_functions.create_invalid_audit_record(); + END +$GO$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/021_suppressed_data_v1_0_0_tables.sql b/backend/dissemination/sql/fac-snapshot-db/post/021_suppressed_data_v1_0_0_tables.sql new file mode 100644 index 0000000000..dfea791ee1 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/021_suppressed_data_v1_0_0_tables.sql @@ -0,0 +1,205 @@ +----------------------------------------------------------- +-- corrective_action_plans +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION suppressed_data_v1_0_0.create_corrective_action_plans() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE suppressed_data_v1_0_0.corrective_action_plans AS + SELECT + cap.id as id, + NEXTVAL('suppressed_data_v1_0_0.seq_corrective_action_plans') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + cap.contains_chart_or_table, + cap.finding_ref_number, + cap.planned_action + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_captext cap + WHERE + cap.report_id = gen.report_id + AND + -- Only include the suppressed corrective action plans. + gen.is_public = false + ORDER BY cap.id; + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- findings_text +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION suppressed_data_v1_0_0.create_findings_text() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE suppressed_data_v1_0_0.findings_text AS + SELECT + ft.id as id, + NEXTVAL('suppressed_data_v1_0_0.seq_findings_text') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + ft.finding_ref_number, + ft.contains_chart_or_table, + ft.finding_text + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_findingtext ft + WHERE + ft.report_id = gen.report_id + AND + -- Findings text is not always public + gen.is_public = false + ORDER BY ft.id; + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- notes_to_sefa +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION suppressed_data_v1_0_0.create_notes_to_sefa() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE suppressed_data_v1_0_0.notes_to_sefa AS + SELECT + note.id as id, + NEXTVAL('suppressed_data_v1_0_0.seq_notes_to_sefa') as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + note.accounting_policies, + note.contains_chart_or_table, + note.content, + note.is_minimis_rate_used, + note.note_title as title, + note.rate_explained + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_note note + WHERE + note.report_id = gen.report_id + AND + -- Some notes are not public. + gen.is_public = false + ORDER BY note.id; + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- migration_inspection_record +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION suppressed_data_v1_0_0.create_migration_inspection_record() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE suppressed_data_v1_0_0.migration_inspection_record AS + SELECT + mir.id AS id, + NEXTVAL('suppressed_data_v1_0_0.seq_migration_inspection_record') as seq, + mir.audit_year, + mir.dbkey, + mir.report_id, + mir.run_datetime, + mir.additional_ein, + mir.additional_uei, + mir.cap_text, + mir.federal_award, + mir.finding, + mir.finding_text, + mir.general, + mir.note, + mir.passthrough, + mir.secondary_auditor + FROM + public_data_v1_0_0.general gen, + dissem_copy.dissemination_migrationinspectionrecord mir + WHERE + mir.report_id = gen.report_id + AND + gen.is_public = false + ; + + -- Add a clean batch number after the table is created. + ALTER TABLE suppressed_data_v1_0_0.migration_inspection_record + ADD COLUMN batch_number INTEGER; + UPDATE suppressed_data_v1_0_0.migration_inspection_record SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- invalid_audit_record +----------------------------------------------------------- +CREATE OR REPLACE FUNCTION suppressed_data_v1_0_0.create_invalid_audit_record() + RETURNS VOID + AS + $ct$ + BEGIN + CREATE TABLE suppressed_data_v1_0_0.invalid_audit_record AS + SELECT + iar.id AS id, + NEXTVAL('suppressed_data_v1_0_0.seq_invalid_audit_record') as seq, + iar.audit_year, + iar.dbkey, + iar.report_id, + iar.run_datetime, + iar.additional_ein, + iar.additional_uei, + iar.cap_text, + iar.federal_award, + iar.finding, + iar.finding_text, + iar.general, + iar.note, + iar.passthrough, + iar.secondary_auditor + FROM + dissem_copy.dissemination_invalidauditrecord iar, + dissem_copy.dissemination_general gen + WHERE + iar.report_id = gen.report_id + AND + gen.is_public = true + ; + + -- Add a clean batch number after the table is created. + ALTER TABLE suppressed_data_v1_0_0.invalid_audit_record + ADD COLUMN batch_number INTEGER; + UPDATE suppressed_data_v1_0_0.invalid_audit_record SET batch_number=DIV(seq, public.batch_size()); + END + $ct$ + LANGUAGE plpgsql; + +----------------------------------------------------------- +-- CONDITIONAL TABLE CREATION +----------------------------------------------------------- +DO LANGUAGE plpgsql +$GO$ + BEGIN + RAISE info 'Creating suppressed corrective_action_plans'; + PERFORM suppressed_data_v1_0_0.create_corrective_action_plans(); + RAISE info 'Creating suppressed findings_text'; + PERFORM suppressed_data_v1_0_0.create_findings_text(); + RAISE info 'Creating suppressed notes_to_sefa'; + PERFORM suppressed_data_v1_0_0.create_notes_to_sefa(); + RAISE info 'Creating migration_inspection_record'; + PERFORM suppressed_data_v1_0_0.create_migration_inspection_record(); + RAISE info 'Create invalid_audit_record'; + PERFORM suppressed_data_v1_0_0.create_invalid_audit_record(); + END +$GO$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/030_public_data_v1_0_0_metadata.sql b/backend/dissemination/sql/fac-snapshot-db/post/030_public_data_v1_0_0_metadata.sql new file mode 100644 index 0000000000..d7b30d2c5a --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/030_public_data_v1_0_0_metadata.sql @@ -0,0 +1,79 @@ +CREATE OR REPLACE FUNCTION public_data_v1_0_0.create_metadata() + RETURNS VOID +AS +$ct$ +BEGIN + CREATE TABLE public_data_v1_0_0.metadata AS + SELECT 'additional_eins' + AS table, COUNT(*) + FROM public_data_v1_0_0.additional_eins + UNION + SELECT 'additional_ueis' + AS table, COUNT(*) + FROM public_data_v1_0_0.additional_ueis + UNION + SELECT 'combined' + AS table, COUNT(*) + FROM public_data_v1_0_0.combined + UNION + SELECT 'federal_awards' + AS table, COUNT(*) + FROM public_data_v1_0_0.federal_awards + UNION + SELECT 'findings_text' + AS table, COUNT(*) + FROM public_data_v1_0_0.findings_text + UNION + SELECT 'findings' + AS table, COUNT(*) + FROM public_data_v1_0_0.findings + UNION + SELECT 'general' + AS table, COUNT(*) + FROM public_data_v1_0_0.general + UNION + SELECT 'notes_to_sefa' + AS table, COUNT(*) + FROM public_data_v1_0_0.notes_to_sefa + UNION + SELECT 'passthrough' + AS table, COUNT(*) + FROM public_data_v1_0_0.passthrough + UNION + SELECT 'secondary_auditors' + AS table, COUNT(*) + FROM public_data_v1_0_0.secondary_auditors + UNION + SELECT 'suppressed_corrective_action_plans' + AS table, COUNT(*) + FROM suppressed_data_v1_0_0.corrective_action_plans + UNION + SELECT 'suppressed_findings_text' + AS table, COUNT(*) + FROM suppressed_data_v1_0_0.findings_text + UNION + SELECT 'suppressed_notes_to_sefa' + AS table, COUNT(*) + FROM suppressed_data_v1_0_0.notes_to_sefa + UNION + SELECT 'public_submission_count' + AS table, COUNT(*) + FROM public_data_v1_0_0.general gen + WHERE gen.is_public = true + UNION + SELECT 'suppressed_submission_count' + AS table, COUNT(*) + FROM public_data_v1_0_0.general gen + WHERE gen.is_public = false + ; +END +$ct$ +LANGUAGE plpgsql; + +DO LANGUAGE plpgsql +$GO$ + BEGIN + RAISE info 'Creating metadata table'; + PERFORM public_data_v1_0_0.create_metadata(); + END +$GO$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/120_api_v1_1_0_gate.sql b/backend/dissemination/sql/fac-snapshot-db/post/120_api_v1_1_0_gate.sql new file mode 100644 index 0000000000..1ed472fb34 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/120_api_v1_1_0_gate.sql @@ -0,0 +1,28 @@ +------------------------------------------------------------------ +-- GATE +------------------------------------------------------------------ +-- We only want the API to run if certain conditions are met. +-- We could try and encode that in the `bash` portion of the code. +-- Or, we could just gate things at the top of our SQL. +-- If the conditions are not met, we should exit noisily. +-- A cast to regclass will fail with an exception if the table +-- does not exist. +DO LANGUAGE plpgsql +$GATE$ + DECLARE + the_schema varchar := 'dissem_copy'; + the_table varchar := 'dissemination_general'; + api_ver varchar := 'api_v1_1_0'; + BEGIN + IF EXISTS ( + SELECT FROM pg_tables + WHERE schemaname = the_schema + AND tablename = the_table + ) + THEN + RAISE info '% Gate condition met. Continuing.', api_ver; + ELSE + RAISE exception '% %.% not found.', api_ver, the_schema, the_table; + END IF; + END +$GATE$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/122_api_v1_1_0_functions.sql b/backend/dissemination/sql/fac-snapshot-db/post/122_api_v1_1_0_functions.sql new file mode 100644 index 0000000000..945edbb15e --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/122_api_v1_1_0_functions.sql @@ -0,0 +1,53 @@ +----------------------------------------------------- +-- get_header +-- Reaches into the headers provided through PostgREST in order to +-- grab a particular value, keyed by the header key (e.g. "x-api-key") +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v1_1_0_functions.get_header(item text) RETURNS TEXT + AS $get_header$ + DECLARE res TEXT; + BEGIN + SELECT (current_setting('request.headers', true)::json)->>item INTO res; + RETURN res; + END; +$get_header$ LANGUAGE plpgsql; + +----------------------------------------------------- +-- get_api_key_uuid +-- Uses the get_header function to grab the user id provided by api.data.gov +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v1_1_0_functions.get_api_key_uuid() RETURNS TEXT +AS $gaku$ +DECLARE uuid TEXT; +BEGIN + SELECT api_v1_1_0_functions.get_header('x-api-user-id') INTO uuid; + RETURN uuid; +end; +$gaku$ LANGUAGE plpgsql; + +----------------------------------------------------- +-- has_tribal_data_access +-- Determines whether the key id in question has been granted +-- tribal data access. Required for accessing all of the suppressed tables. +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v1_1_0_functions.has_tribal_data_access() +RETURNS BOOLEAN +AS $has_tribal_data_access$ +DECLARE + uuid_header UUID; + key_exists BOOLEAN; +BEGIN + SELECT api_v1_1_0_functions.get_api_key_uuid() INTO uuid_header; + SELECT + CASE WHEN EXISTS ( + SELECT key_id + FROM dissem_copy.dissemination_tribalapiaccesskeyids taaki + WHERE taaki.key_id = uuid_header::TEXT) + THEN 1::BOOLEAN + ELSE 0::BOOLEAN + END + INTO key_exists; + -- RAISE INFO 'api_v1_1_0 has_tribal % %', uuid_header, key_exists; + RETURN key_exists; +END; +$has_tribal_data_access$ LANGUAGE plpgsql; diff --git a/backend/dissemination/api/api_v1_1_0/create_views.sql b/backend/dissemination/sql/fac-snapshot-db/post/123_api_v1_1_0_views.sql similarity index 88% rename from backend/dissemination/api/api_v1_1_0/create_views.sql rename to backend/dissemination/sql/fac-snapshot-db/post/123_api_v1_1_0_views.sql index f14f58d50e..1565041224 100644 --- a/backend/dissemination/api/api_v1_1_0/create_views.sql +++ b/backend/dissemination/sql/fac-snapshot-db/post/123_api_v1_1_0_views.sql @@ -1,25 +1,19 @@ -begin; - --------------------------------------- --- finding_text +-- additional_eins --------------------------------------- -create view api_v1_1_0.findings_text as +create view api_v1_1_0.additional_eins as select gen.report_id, gen.auditee_uei, gen.audit_year, - ft.finding_ref_number, - ft.contains_chart_or_table, - ft.finding_text + --- + ein.additional_ein from - dissemination_findingtext ft, - dissemination_general gen + dissem_copy.dissemination_general gen, + dissem_copy.dissemination_additionalein ein where - ft.report_id = gen.report_id - and - (gen.is_public = true - or (gen.is_public = false and api_v1_1_0_functions.has_tribal_data_access())) - order by ft.id + gen.report_id = ein.report_id + order by ein.id ; --------------------------------------- @@ -33,38 +27,35 @@ create view api_v1_1_0.additional_ueis as --- uei.additional_uei from - dissemination_general gen, - dissemination_additionaluei uei + dissem_copy.dissemination_general gen, + dissem_copy.dissemination_additionaluei uei where gen.report_id = uei.report_id order by uei.id ; + --------------------------------------- --- finding +-- corrective_action_plan --------------------------------------- -create view api_v1_1_0.findings as +create view api_v1_1_0.corrective_action_plans as select gen.report_id, gen.auditee_uei, gen.audit_year, - finding.award_reference, - finding.reference_number, - finding.is_material_weakness, - finding.is_modified_opinion, - finding.is_other_findings, - finding.is_other_matters, - finding.prior_finding_ref_numbers, - finding.is_questioned_costs, - finding.is_repeat_finding, - finding.is_significant_deficiency, - finding.type_requirement + --- + ct.finding_ref_number, + ct.contains_chart_or_table, + ct.planned_action from - dissemination_finding finding, - dissemination_general gen + dissem_copy.dissemination_CAPText ct, + dissem_copy.dissemination_General gen where - finding.report_id = gen.report_id - order by finding.id + ct.report_id = gen.report_id + and + (gen.is_public = true + or (gen.is_public = false and api_v1_1_0_functions.has_tribal_data_access())) + order by ct.id ; --------------------------------------- @@ -96,84 +87,62 @@ create view api_v1_1_0.federal_awards as award.is_passthrough_award, award.passthrough_amount from - dissemination_federalaward award, - dissemination_general gen + dissem_copy.dissemination_federalaward award, + dissem_copy.dissemination_general gen where award.report_id = gen.report_id order by award.id ; - --------------------------------------- --- corrective_action_plan +-- finding --------------------------------------- -create view api_v1_1_0.corrective_action_plans as +create view api_v1_1_0.findings as select gen.report_id, gen.auditee_uei, gen.audit_year, - --- - ct.finding_ref_number, - ct.contains_chart_or_table, - ct.planned_action + finding.award_reference, + finding.reference_number, + finding.is_material_weakness, + finding.is_modified_opinion, + finding.is_other_findings, + finding.is_other_matters, + finding.prior_finding_ref_numbers, + finding.is_questioned_costs, + finding.is_repeat_finding, + finding.is_significant_deficiency, + finding.type_requirement from - dissemination_CAPText ct, - dissemination_General gen + dissem_copy.dissemination_finding finding, + dissem_copy.dissemination_general gen where - ct.report_id = gen.report_id - and - (gen.is_public = true - or (gen.is_public = false and api_v1_1_0_functions.has_tribal_data_access())) - order by ct.id + finding.report_id = gen.report_id + order by finding.id ; --------------------------------------- --- notes_to_sefa +-- finding_text --------------------------------------- -create view api_v1_1_0.notes_to_sefa as +create view api_v1_1_0.findings_text as select gen.report_id, gen.auditee_uei, gen.audit_year, - --- - note.note_title as title, - note.accounting_policies, - note.is_minimis_rate_used, - note.rate_explained, - note.content, - note.contains_chart_or_table + ft.finding_ref_number, + ft.contains_chart_or_table, + ft.finding_text from - dissemination_general gen, - dissemination_note note + dissem_copy.dissemination_findingtext ft, + dissem_copy.dissemination_general gen where - note.report_id = gen.report_id - and + ft.report_id = gen.report_id + and (gen.is_public = true or (gen.is_public = false and api_v1_1_0_functions.has_tribal_data_access())) - order by note.id -; - ---------------------------------------- --- passthrough ---------------------------------------- -create view api_v1_1_0.passthrough as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - pass.award_reference, - pass.passthrough_id, - pass.passthrough_name - from - dissemination_general as gen, - dissemination_passthrough as pass - where - gen.report_id = pass.report_id - order by pass.id + order by ft.id ; - --------------------------------------- -- general --------------------------------------- @@ -253,10 +222,56 @@ create view api_v1_1_0.general as ELSE 'Yes' END AS is_secondary_auditors from - dissemination_general gen + dissem_copy.dissemination_general gen order by gen.id ; +--------------------------------------- +-- notes_to_sefa +--------------------------------------- +create view api_v1_1_0.notes_to_sefa as + select + gen.report_id, + gen.auditee_uei, + gen.audit_year, + --- + note.note_title as title, + note.accounting_policies, + note.is_minimis_rate_used, + note.rate_explained, + note.content, + note.contains_chart_or_table + from + dissem_copy.dissemination_general gen, + dissem_copy.dissemination_note note + where + note.report_id = gen.report_id + and + (gen.is_public = true + or (gen.is_public = false and api_v1_1_0_functions.has_tribal_data_access())) + order by note.id +; + +--------------------------------------- +-- passthrough +--------------------------------------- +create view api_v1_1_0.passthrough as + select + gen.report_id, + gen.auditee_uei, + gen.audit_year, + --- + pass.award_reference, + pass.passthrough_id, + pass.passthrough_name + from + dissem_copy.dissemination_general as gen, + dissem_copy.dissemination_passthrough as pass + where + gen.report_id = pass.report_id + order by pass.id +; + --------------------------------------- -- auditor (secondary auditor) --------------------------------------- @@ -277,30 +292,9 @@ create view api_v1_1_0.secondary_auditors as sa.address_state, sa.address_zipcode from - dissemination_General gen, - dissemination_SecondaryAuditor sa + dissem_copy.dissemination_General gen, + dissem_copy.dissemination_SecondaryAuditor sa where sa.report_id = gen.report_id order by sa.id ; - -create view api_v1_1_0.additional_eins as - select - gen.report_id, - gen.auditee_uei, - gen.audit_year, - --- - ein.additional_ein - from - dissemination_general gen, - dissemination_additionalein ein - where - gen.report_id = ein.report_id - order by ein.id -; - -commit; - -notify pgrst, - 'reload schema'; - diff --git a/backend/dissemination/sql/fac-snapshot-db/post/124_api_v1_1_0_rpc.sql b/backend/dissemination/sql/fac-snapshot-db/post/124_api_v1_1_0_rpc.sql new file mode 100644 index 0000000000..9067b8ef05 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/124_api_v1_1_0_rpc.sql @@ -0,0 +1,46 @@ +CREATE OR REPLACE FUNCTION api_v1_1_0.request_file_access( + report_id TEXT +) RETURNS JSON LANGUAGE plpgsql AS +$$ +DECLARE + v_uuid_header TEXT; + v_access_uuid VARCHAR(200); + v_key_exists BOOLEAN; + v_key_added_date DATE; +BEGIN + + SELECT api_v1_1_0_functions.get_api_key_uuid() INTO v_uuid_header; + + -- Check if the provided API key exists in dissem_copy.dissemination_TribalApiAccessKeyIds + SELECT + EXISTS( + SELECT 1 + FROM dissem_copy.dissemination_tribalapiaccesskeyids + WHERE key_id = v_uuid_header + ) INTO v_key_exists; + + + -- Get the added date of the key from dissem_copy.dissemination_TribalApiAccessKeyIds + SELECT date_added + INTO v_key_added_date + FROM dissem_copy.dissemination_tribalapiaccesskeyids + WHERE key_id = v_uuid_header; + + + -- Check if the key is less than 6 months old + IF v_uuid_header IS NOT NULL AND v_key_exists AND v_key_added_date >= CURRENT_DATE - INTERVAL '6 months' THEN + -- Generate UUID (using PostgreSQL's gen_random_uuid function) + SELECT gen_random_uuid() INTO v_access_uuid; + + -- Inserting data into the one_time_access table + INSERT INTO dissem_copy.dissemination_onetimeaccess (uuid, api_key_id, timestamp, report_id) + VALUES (v_access_uuid::UUID, v_uuid_header, CURRENT_TIMESTAMP, report_id); + + -- Return the UUID to the user + RETURN json_build_object('access_uuid', v_access_uuid); + ELSE + -- Return an error for unauthorized access + RETURN json_build_object('error', 'Unauthorized access or key older than 6 months')::JSON; + END IF; +END; +$$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/140_api_v2_0_0_gate.sql b/backend/dissemination/sql/fac-snapshot-db/post/140_api_v2_0_0_gate.sql new file mode 100644 index 0000000000..f66ceb2c55 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/140_api_v2_0_0_gate.sql @@ -0,0 +1,28 @@ +------------------------------------------------------------------ +-- GATE +------------------------------------------------------------------ +-- We only want the API to run if certain conditions are met. +-- We could try and encode that in the `bash` portion of the code. +-- Or, we could just gate things at the top of our SQL. +-- If the conditions are not met, we should exit noisily. +-- A cast to regclass will fail with an exception if the table +-- does not exist. +DO LANGUAGE plpgsql +$GATE$ + DECLARE + the_schema varchar := 'public_data_v1_0_0'; + the_table varchar := 'metadata'; + api_ver varchar := 'api_v2_0_0'; + BEGIN + IF EXISTS ( + SELECT FROM pg_tables + WHERE schemaname = the_schema + AND tablename = the_table + ) + THEN + RAISE info '% Gate condition met. Continuing.', api_ver; + ELSE + RAISE exception '% %.% not found.', api_ver, the_schema, the_table; + END IF; + END +$GATE$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/142_api_v2_0_0_functions.sql b/backend/dissemination/sql/fac-snapshot-db/post/142_api_v2_0_0_functions.sql new file mode 100644 index 0000000000..25445aa7c4 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/142_api_v2_0_0_functions.sql @@ -0,0 +1,53 @@ +----------------------------------------------------- +-- get_header +-- Reaches into the headers provided through PostgREST in order to +-- grab a particular value, keyed by the header key (e.g. "x-api-key") +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v2_0_0_functions.get_header(item text) RETURNS TEXT + AS $get_header$ + DECLARE res TEXT; + BEGIN + SELECT (current_setting('request.headers', true)::json)->>item INTO res; + RETURN res; + END; +$get_header$ LANGUAGE plpgsql; + +----------------------------------------------------- +-- get_api_key_uuid +-- Uses the get_header function to grab the user id provided by api.data.gov +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v2_0_0_functions.get_api_key_uuid() RETURNS TEXT +AS $gaku$ +DECLARE uuid TEXT; +BEGIN + SELECT api_v2_0_0_functions.get_header('x-api-user-id') INTO uuid; + RETURN uuid; +end; +$gaku$ LANGUAGE plpgsql; + +----------------------------------------------------- +-- has_tribal_data_access +-- Determines whether the key id in question has been granted +-- tribal data access. Required for accessing all of the suppressed tables. +----------------------------------------------------- +CREATE OR REPLACE FUNCTION api_v2_0_0_functions.has_tribal_data_access() +RETURNS BOOLEAN +AS $has_tribal_data_access$ +DECLARE + uuid_header UUID; + key_exists BOOLEAN; +BEGIN + SELECT api_v2_0_0_functions.get_api_key_uuid() INTO uuid_header; + SELECT + CASE WHEN EXISTS ( + SELECT key_id + FROM dissem_copy.dissemination_tribalapiaccesskeyids taaki + WHERE taaki.key_id = uuid_header::TEXT) + THEN 1::BOOLEAN + ELSE 0::BOOLEAN + END + INTO key_exists; + -- RAISE INFO 'api_v2_0_0 has_tribal % %', uuid_header, key_exists; + RETURN key_exists; +END; +$has_tribal_data_access$ LANGUAGE plpgsql; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/143_api_v2_0_0_views.sql b/backend/dissemination/sql/fac-snapshot-db/post/143_api_v2_0_0_views.sql new file mode 100644 index 0000000000..aac2b3c5c0 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/143_api_v2_0_0_views.sql @@ -0,0 +1,112 @@ +--------------------------------------- +-- additional_eins +--------------------------------------- +CREATE VIEW api_v2_0_0.additional_eins AS + SELECT * FROM public_data_v1_0_0.additional_eins ae + ORDER BY ae.id; + +--------------------------------------- +-- additional_ueis +--------------------------------------- +create view api_v2_0_0.additional_ueis AS + SELECT * FROM public_data_v1_0_0.additional_ueis au + ORDER BY au.id; + +--------------------------------------- +-- corrective_action_plan +--------------------------------------- +CREATE VIEW api_v2_0_0.corrective_action_plans AS + SELECT * FROM public_data_v1_0_0.corrective_action_plans cap + ORDER BY cap.id; + +--------------------------------------- +-- finding +--------------------------------------- +CREATE VIEW api_v2_0_0.findings as + SELECT * FROM public_data_v1_0_0.findings f + ORDER BY f.id; + +--------------------------------------- +-- finding_text +--------------------------------------- +CREATE VIEW api_v2_0_0.findings_text AS + SELECT * FROM public_data_v1_0_0.findings_text ft + ORDER BY ft.id; + +--------------------------------------- +-- federal award +--------------------------------------- +CREATE VIEW api_v2_0_0.federal_awards AS + SELECT * FROM public_data_v1_0_0.federal_awards fa + ORDER BY fa.id; + +--------------------------------------- +-- general +--------------------------------------- +CREATE VIEW api_v2_0_0.general AS + SELECT * FROM public_data_v1_0_0.general gen + ORDER BY gen.id; + +--------------------------------------- +-- notes_to_sefa +--------------------------------------- +CREATE VIEW api_v2_0_0.notes_to_sefa AS + SELECT * FROM public_data_v1_0_0.notes_to_sefa nts + ORDER BY nts.id; + +--------------------------------------- +-- passthrough +--------------------------------------- +CREATE VIEW api_v2_0_0.passthrough AS + SELECT * FROM public_data_v1_0_0.passthrough p + ORDER BY p.id; + +--------------------------------------- +-- auditor (secondary auditor) +--------------------------------------- +CREATE VIEW api_v2_0_0.secondary_auditors AS + SELECT * FROM public_data_v1_0_0.secondary_auditors sa + ORDER BY sa.id; + +--------------------------------------- +-- combined +--------------------------------------- +CREATE VIEW api_v2_0_0.combined AS + SELECT * FROM public_data_v1_0_0.combined comb + ; + +--------------------------------------- +-- metadata +--------------------------------------- +CREATE VIEW api_v2_0_0.metadata AS + SELECT * FROM public_data_v1_0_0.metadata; + +------------------------------------------------------------------------------ +-- SUPPRESSED ENDPOINTS +-- These require an API key that has tribal/suppressed data access. +------------------------------------------------------------------------------ + +--------------------------------------- +-- suppressed_corrective_action_plan +--------------------------------------- +CREATE VIEW api_v2_0_0.suppressed_corrective_action_plans AS + SELECT * FROM suppressed_data_v1_0_0.corrective_action_plans cap + WHERE api_v2_0_0_functions.has_tribal_data_access() + ORDER BY cap.id; + + +--------------------------------------- +-- suppressed_finding_text +--------------------------------------- +CREATE VIEW api_v2_0_0.suppressed_findings_text AS + SELECT * FROM suppressed_data_v1_0_0.findings_text ft + WHERE api_v2_0_0_functions.has_tribal_data_access() + ORDER BY ft.id; + +--------------------------------------- +-- suppressed_notes_to_sefa +--------------------------------------- +CREATE VIEW api_v2_0_0.suppressed_notes_to_sefa AS + SELECT * FROM suppressed_data_v1_0_0.notes_to_sefa nts + WHERE api_v2_0_0_functions.has_tribal_data_access() + ORDER BY nts.id; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/144_api_v2_0_0_rpc.sql b/backend/dissemination/sql/fac-snapshot-db/post/144_api_v2_0_0_rpc.sql new file mode 100644 index 0000000000..afa8337fec --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/144_api_v2_0_0_rpc.sql @@ -0,0 +1,46 @@ +CREATE OR REPLACE FUNCTION api_v2_0_0.request_file_access( + report_id TEXT +) RETURNS JSON LANGUAGE plpgsql AS +$$ +DECLARE + v_uuid_header TEXT; + v_access_uuid VARCHAR(200); + v_key_exists BOOLEAN; + v_key_added_date DATE; +BEGIN + + SELECT api_v2_0_0_functions.get_api_key_uuid() INTO v_uuid_header; + + -- Check if the provided API key exists in dissem_copy.dissemination_TribalApiAccessKeyIds + SELECT + EXISTS( + SELECT 1 + FROM dissem_copy.dissemination_tribalapiaccesskeyids + WHERE key_id = v_uuid_header + ) INTO v_key_exists; + + + -- Get the added date of the key from dissem_copy.dissemination_TribalApiAccessKeyIds + SELECT date_added + INTO v_key_added_date + FROM dissem_copy.dissemination_tribalapiaccesskeyids + WHERE key_id = v_uuid_header; + + + -- Check if the key is less than 6 months old + IF v_uuid_header IS NOT NULL AND v_key_exists AND v_key_added_date >= CURRENT_DATE - INTERVAL '6 months' THEN + -- Generate UUID (using PostgreSQL's gen_random_uuid function) + SELECT gen_random_uuid() INTO v_access_uuid; + + -- Inserting data into the one_time_access table + INSERT INTO dissem_copy.dissemination_onetimeaccess (uuid, api_key_id, timestamp, report_id) + VALUES (v_access_uuid::UUID, v_uuid_header, CURRENT_TIMESTAMP, report_id); + + -- Return the UUID to the user + RETURN json_build_object('access_uuid', v_access_uuid); + ELSE + -- Return an error for unauthorized access + RETURN json_build_object('error', 'Unauthorized access or key older than 6 months')::JSON; + END IF; +END; +$$; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/800_permissions.sql b/backend/dissemination/sql/fac-snapshot-db/post/800_permissions.sql new file mode 100644 index 0000000000..5778fc75bc --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/800_permissions.sql @@ -0,0 +1,26 @@ +-- This grants access to the tables and views that were created +-- to the API server. + +-- Decommissioned 20241024 +-- GRANT SELECT ON ALL TABLES IN SCHEMA api_v1_0_3 TO api_fac_gov; + +----------------------------------------------------- +-- api_v1_1_0 PERMISSIONS +----------------------------------------------------- +GRANT USAGE ON SCHEMA api_v1_1_0_functions TO api_fac_gov; +GRANT USAGE ON SCHEMA api_v1_1_0 TO api_fac_gov; +GRANT SELECT ON ALL TABLES IN SCHEMA api_v1_1_0 TO api_fac_gov; +-- Need these for tribal access checks -- functions and tables +GRANT SELECT ON ALL TABLES IN SCHEMA dissem_copy to api_fac_gov; +GRANT USAGE ON SCHEMA dissem_copy to api_fac_gov; + +-- There are no sequences currently on api_v1_1_0 +-- GRANT SELECT, USAGE ON ALL SEQUENCES IN SCHEMA api_v1_1_0 TO api_fac_gov; + +----------------------------------------------------- +-- api_v2_0_0 PERMISSIONS +----------------------------------------------------- +GRANT USAGE ON SCHEMA api_v2_0_0_functions TO api_fac_gov; +GRANT USAGE ON SCHEMA api_v2_0_0 TO api_fac_gov; +GRANT SELECT ON ALL TABLES IN SCHEMA api_v2_0_0 TO api_fac_gov; +GRANT SELECT, USAGE ON ALL SEQUENCES IN SCHEMA api_v2_0_0 TO api_fac_gov; diff --git a/backend/dissemination/sql/fac-snapshot-db/post/950_indexes_additional_eins.sql b/backend/dissemination/sql/fac-snapshot-db/post/950_indexes_additional_eins.sql new file mode 100644 index 0000000000..4530e7b00a --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/950_indexes_additional_eins.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on additional_eins +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_additional_eins_report_id_idx + on public_data_v1_0_0.additional_eins (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_additional_eins_report_id_fad + on public_data_v1_0_0.additional_eins (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_additional_eins + ON public_data_v1_0_0.additional_eins (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/951_indexes_additional_ueis.sql b/backend/dissemination/sql/fac-snapshot-db/post/951_indexes_additional_ueis.sql new file mode 100644 index 0000000000..7316d74a2e --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/951_indexes_additional_ueis.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on additional_ueis +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_additional_ueis_report_id_idx + on public_data_v1_0_0.additional_ueis (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_additional_ueis_report_id_fad + on public_data_v1_0_0.additional_ueis (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_additional_ueis + ON public_data_v1_0_0.additional_ueis (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/952_indexes_combined.sql b/backend/dissemination/sql/fac-snapshot-db/post/952_indexes_combined.sql new file mode 100644 index 0000000000..aa8771955c --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/952_indexes_combined.sql @@ -0,0 +1,78 @@ + +--------------------------------------- +-- INDEXES on combined +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_report_id_idx + on public_data_v1_0_0.combined (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_report_id_fad + on public_data_v1_0_0.combined (report_id, fac_accepted_date); + +-- Some of these may be redundant? Not sure how indexes overlap (or don't). +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_fad_aln + on public_data_v1_0_0.combined (fac_accepted_date, aln); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_fad_agency + on public_data_v1_0_0.combined (fac_accepted_date, federal_agency_prefix); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_fad_agency_ext + on public_data_v1_0_0.combined (fac_accepted_date, federal_agency_prefix, federal_award_extension); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_report_id_fad_aln + on public_data_v1_0_0.combined (report_id, fac_accepted_date, aln); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_report_id_fad_agency_ext + on public_data_v1_0_0.combined (report_id, federal_agency_prefix, federal_award_extension); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditee_certify_name_idx + ON public_data_v1_0_0.combined + ((lower(auditee_certify_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditee_name_idx + ON public_data_v1_0_0.combined + ((lower(auditee_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditor_certify_name_idx + ON public_data_v1_0_0.combined + ((lower(auditor_certify_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditor_contact_name_idx + ON public_data_v1_0_0.combined + ((lower(auditor_contact_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditor_firm_name_idx + ON public_data_v1_0_0.combined + ((lower(auditor_firm_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditee_email_idx + on public_data_v1_0_0.combined ((lower(auditee_email))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditor_email_idx + on public_data_v1_0_0.combined ((lower(auditor_email))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_start_date_idx + ON public_data_v1_0_0.combined (fy_start_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_end_date_idx + ON public_data_v1_0_0.combined (fy_end_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditee_uei_idx + ON public_data_v1_0_0.combined (auditee_uei); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_auditee_ein_idx + ON public_data_v1_0_0.combined (auditee_ein); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_federal_agency_prefix_idx + on public_data_v1_0_0.combined (federal_agency_prefix); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_federal_award_extension_idx + on public_data_v1_0_0.combined (federal_award_extension); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_audit_year_idx + on public_data_v1_0_0.combined (audit_year); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_combined_aln_idx + on public_data_v1_0_0.combined (aln); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_combined + ON public_data_v1_0_0.combined (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/953_indexes_corrective_action_plans.sql b/backend/dissemination/sql/fac-snapshot-db/post/953_indexes_corrective_action_plans.sql new file mode 100644 index 0000000000..65174c5e5d --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/953_indexes_corrective_action_plans.sql @@ -0,0 +1,14 @@ +--------------------------------------- +-- INDEXES on corrective_action_plans +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_corrective_action_plans_report_id_idx + on public_data_v1_0_0.corrective_action_plans (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_corrective_action_plans_report_id_fad_idx + on public_data_v1_0_0.corrective_action_plans (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_corrective_action_plans_report_id_fad_frn_idx + on public_data_v1_0_0.corrective_action_plans (report_id, fac_accepted_date, finding_ref_number); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_corrective_action_plans_idx + ON public_data_v1_0_0.corrective_action_plans (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/954_indexes_federal_awards.sql b/backend/dissemination/sql/fac-snapshot-db/post/954_indexes_federal_awards.sql new file mode 100644 index 0000000000..a01526bd6f --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/954_indexes_federal_awards.sql @@ -0,0 +1,38 @@ +--------------------------------------- +-- INDEXES on federal_awards +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_id_idx + on public_data_v1_0_0.federal_awards (id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_report_id_idx + on public_data_v1_0_0.federal_awards (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_report_id_fad + on public_data_v1_0_0.federal_awards (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_rid_fap + ON public_data_v1_0_0.federal_awards (report_id, federal_agency_prefix); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_rid_fap_fext + ON public_data_v1_0_0.federal_awards (report_id, federal_agency_prefix, federal_award_extension); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_rid_fap_fext_fad + ON public_data_v1_0_0.federal_awards (report_id, federal_agency_prefix, federal_award_extension, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_fap_fext_cnt + ON public_data_v1_0_0.federal_awards (federal_agency_prefix,federal_award_extension,findings_count); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_rid_aln + ON public_data_v1_0_0.federal_awards (report_id, aln); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_rid_aln_fad + ON public_data_v1_0_0.federal_awards (report_id, aln, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_awards_aln_cnt + ON public_data_v1_0_0.federal_awards (aln,findings_count); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_fa + ON public_data_v1_0_0.federal_awards (batch_number); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_ay + ON public_data_v1_0_0.federal_awards (audit_year); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/955_indexes_findings.sql b/backend/dissemination/sql/fac-snapshot-db/post/955_indexes_findings.sql new file mode 100644 index 0000000000..f01f7f2b56 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/955_indexes_findings.sql @@ -0,0 +1,19 @@ +--------------------------------------- +-- INDEXES on findings +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_report_id_idx + on public_data_v1_0_0.findings (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_report_id_fad + on public_data_v1_0_0.findings (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_rid_aref + on public_data_v1_0_0.findings (report_id, award_reference); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_rid_aref_fad + on public_data_v1_0_0.findings (report_id, award_reference, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_findings_idx + ON public_data_v1_0_0.findings (batch_number); + + diff --git a/backend/dissemination/sql/fac-snapshot-db/post/956_indexes_findings_text.sql b/backend/dissemination/sql/fac-snapshot-db/post/956_indexes_findings_text.sql new file mode 100644 index 0000000000..fc1cabde72 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/956_indexes_findings_text.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on findings_text +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_text_report_id_idx + on public_data_v1_0_0.findings_text (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_findings_text_report_id_fad + on public_data_v1_0_0.findings_text (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_findings_text_idx + ON public_data_v1_0_0.findings_text (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/957_indexes_general.sql b/backend/dissemination/sql/fac-snapshot-db/post/957_indexes_general.sql new file mode 100644 index 0000000000..f4e819d109 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/957_indexes_general.sql @@ -0,0 +1,56 @@ + +--------------------------------------- +-- INDEXES on general +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_report_id_idx + on public_data_v1_0_0.general (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_report_id_fad + on public_data_v1_0_0.general (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_report_id_fad + on public_data_v1_0_0.general (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditee_certify_name_idx + ON public_data_v1_0_0.general + ((lower(auditee_certify_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditee_name_idx + ON public_data_v1_0_0.general + ((lower(auditee_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditor_certify_name_idx + ON public_data_v1_0_0.general + ((lower(auditor_certify_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditor_contact_name_idx + ON public_data_v1_0_0.general + ((lower(auditor_contact_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditor_firm_name_idx + ON public_data_v1_0_0.general + ((lower(auditor_firm_name))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditee_email_idx + on public_data_v1_0_0.general ((lower(auditee_email))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditor_email_idx + on public_data_v1_0_0.general ((lower(auditor_email))); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_start_date_idx + ON public_data_v1_0_0.general (fy_start_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_end_date_idx + ON public_data_v1_0_0.general (fy_end_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditee_uei_idx + ON public_data_v1_0_0.general (auditee_uei); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_auditee_ein_idx + ON public_data_v1_0_0.general (auditee_ein); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_general_audit_year_idx + on public_data_v1_0_0.general (audit_year); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_general_idx + ON public_data_v1_0_0.general (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/958_indexes_notes_to_sefa.sql b/backend/dissemination/sql/fac-snapshot-db/post/958_indexes_notes_to_sefa.sql new file mode 100644 index 0000000000..74649b828c --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/958_indexes_notes_to_sefa.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on notes_to_sefa +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_notes_to_sefa_report_id_idx + on public_data_v1_0_0.notes_to_sefa (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_notes_to_sefa_report_id_fad + on public_data_v1_0_0.notes_to_sefa (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_notes_to_sefa_idx + ON public_data_v1_0_0.notes_to_sefa (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/959_indexes_passthrough.sql b/backend/dissemination/sql/fac-snapshot-db/post/959_indexes_passthrough.sql new file mode 100644 index 0000000000..7d156e2fe8 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/959_indexes_passthrough.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on passthrough +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_passthrough_report_id_idx + on public_data_v1_0_0.passthrough (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_passthrough_report_id_fad + on public_data_v1_0_0.passthrough (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_passthrough_idx + ON public_data_v1_0_0.passthrough (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/960_indexes_secondary_auditors.sql b/backend/dissemination/sql/fac-snapshot-db/post/960_indexes_secondary_auditors.sql new file mode 100644 index 0000000000..30684072ea --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/960_indexes_secondary_auditors.sql @@ -0,0 +1,11 @@ +--------------------------------------- +-- INDEXES on secondary_auditors +--------------------------------------- +CREATE INDEX IF NOT EXISTS fac_snapshot_db_secondary_auditors_report_id_idx + on public_data_v1_0_0.secondary_auditors (report_id); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_secondary_auditors_report_id_fad_idx + on public_data_v1_0_0.secondary_auditors (report_id, fac_accepted_date); + +CREATE INDEX IF NOT EXISTS fac_snapshot_db_federal_batch_secondary_auditors_idx + ON public_data_v1_0_0.secondary_auditors (batch_number); diff --git a/backend/dissemination/sql/fac-snapshot-db/post/999_finalize.sql b/backend/dissemination/sql/fac-snapshot-db/post/999_finalize.sql new file mode 100644 index 0000000000..9261088b61 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/post/999_finalize.sql @@ -0,0 +1,4 @@ +-- PostgREST likes to know when the schemas and things +-- attached to them change. +NOTIFY pgrst, 'reload schema'; + diff --git a/backend/dissemination/sql/fac-snapshot-db/pre/010_roles.sql b/backend/dissemination/sql/fac-snapshot-db/pre/010_roles.sql new file mode 100644 index 0000000000..0847b0ee0a --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/pre/010_roles.sql @@ -0,0 +1,35 @@ +------------------------------------------------------------------ +-- authenticator role +------------------------------------------------------------------ +DO +$do$ +BEGIN + IF EXISTS ( + SELECT FROM pg_catalog.pg_roles + WHERE rolname = 'authenticator') THEN + RAISE NOTICE 'ROLES Role "authenticator" already exists. Skipping.'; + ELSE + CREATE ROLE authenticator + LOGIN NOINHERIT NOCREATEDB + NOCREATEROLE NOSUPERUSER; + END IF; +END +$do$; + +------------------------------------------------------------------ +-- api_fac_gov role +------------------------------------------------------------------ +DO +$do$ +BEGIN + IF EXISTS ( + SELECT FROM pg_catalog.pg_roles + WHERE rolname = 'api_fac_gov') THEN + RAISE NOTICE 'ROLES Role "api_fac_gov" already exists. Skipping.'; + ELSE + CREATE ROLE api_fac_gov NOLOGIN; + END IF; +END +$do$; + +GRANT api_fac_gov TO authenticator; diff --git a/backend/dissemination/sql/fac-snapshot-db/pre/060_schemas.sql b/backend/dissemination/sql/fac-snapshot-db/pre/060_schemas.sql new file mode 100644 index 0000000000..d1cf79a750 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/pre/060_schemas.sql @@ -0,0 +1,51 @@ +--------------------------------- +-- DROP +--------------------------------- +DROP SCHEMA IF EXISTS api_v1_0_3 CASCADE; +DROP SCHEMA IF EXISTS api_v1_1_0 CASCADE; +DROP SCHEMA IF EXISTS api_v1_1_1 CASCADE; + +DROP SCHEMA IF EXISTS admin_api_v1_1_0 CASCADE; +DROP SCHEMA IF EXISTS admin_api_v1_1_0_functions CASCADE; + +DROP SCHEMA IF EXISTS admin_api_v1_1_1 CASCADE; +DROP SCHEMA IF EXISTS admin_api_v1_1_1_functions CASCADE; + +DROP SCHEMA IF EXISTS api_v2_0_0 CASCADE; +DROP SCHEMA IF EXISTS api_v2_0_0_functions CASCADE; + +------------- +-- ALWAYS +-- This is the start of the pipeline. +-- It is a copy of the backed up dissemination tables in +-- fac-snapshot-db. Always drop the schema and make a new copy. +------------- +DROP SCHEMA IF EXISTS dissem_copy CASCADE; +DROP SCHEMA IF EXISTS public_data_v1_0_0 CASCADE; +DROP SCHEMA IF EXISTS public_data_v1_0_0_functions CASCADE; +DROP SCHEMA IF EXISTS suppressed_data_v1_0_0 CASCADE; +DROP SCHEMA IF EXISTS suppressed_data_v1_0_0_functions CASCADE; + +--------------------------------- +-- CREATE +--------------------------------- +-- Retired 20241024 +-- CREATE SCHEMA IF NOT EXISTS api_v1_0_3; +-- CREATE SCHEMA IF NOT EXISTS api_v1_0_3_functions; + +CREATE SCHEMA IF NOT EXISTS api_v1_1_0; +CREATE SCHEMA IF NOT EXISTS api_v1_1_0_functions; + +-- Retired 20241024 +-- CREATE SCHEMA IF NOT EXISTS admin_api_v1_1_0; +-- CREATE SCHEMA IF NOT EXISTS admin_api_v1_1_0_functions; + +CREATE SCHEMA IF NOT EXISTS public_data_v1_0_0; +CREATE SCHEMA IF NOT EXISTS public_data_v1_0_0_functions; +CREATE SCHEMA IF NOT EXISTS suppressed_data_v1_0_0; +CREATE SCHEMA IF NOT EXISTS suppressed_data_v1_0_0_functions; +CREATE SCHEMA IF NOT EXISTS dissem_copy; + +CREATE SCHEMA IF NOT EXISTS api_v2_0_0; +CREATE SCHEMA IF NOT EXISTS api_v2_0_0_functions; + diff --git a/backend/dissemination/sql/fac-snapshot-db/pre/090_sequences.sql b/backend/dissemination/sql/fac-snapshot-db/pre/090_sequences.sql new file mode 100644 index 0000000000..0a2b124878 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/pre/090_sequences.sql @@ -0,0 +1,47 @@ +-- Sequences are used to provide a foundation for batching, which +-- is needed for fast download of data. +---------------------------------------------------------- +-- PUBLIC DATA TABLES +---------------------------------------------------------- +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_additional_eins; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_additional_ueis; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_combined; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_corrective_action_plans; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_federal_awards; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_findings_text; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_findings; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_general; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_notes_to_sefa; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_passthrough; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_secondary_auditors; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_migration_inspection_record; +DROP SEQUENCE IF EXISTS public_data_v1_0_0.seq_invalid_audit_record; + +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_additional_eins START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_additional_ueis START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_combined START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_corrective_action_plans START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_federal_awards START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_findings_text START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_findings START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_general START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_notes_to_sefa START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_passthrough START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_secondary_auditors START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_migration_inspection_record START 1; +CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_invalid_audit_record START 1; + +---------------------------------------------------------- +-- SUPPRESSED DATA TABLES +---------------------------------------------------------- +DROP SEQUENCE IF EXISTS suppressed_data_v1_0_0.seq_corrective_action_plans; +DROP SEQUENCE IF EXISTS suppressed_data_v1_0_0.seq_findings_text; +DROP SEQUENCE IF EXISTS suppressed_data_v1_0_0.seq_notes_to_sefa; +DROP SEQUENCE IF EXISTS suppressed_data_v1_0_0.seq_migration_inspection_record; +DROP SEQUENCE IF EXISTS suppressed_data_v1_0_0.seq_invalid_audit_record; + +CREATE SEQUENCE IF NOT EXISTS suppressed_data_v1_0_0.seq_corrective_action_plans START 1; +CREATE SEQUENCE IF NOT EXISTS suppressed_data_v1_0_0.seq_findings_text START 1; +CREATE SEQUENCE IF NOT EXISTS suppressed_data_v1_0_0.seq_notes_to_sefa START 1; +CREATE SEQUENCE IF NOT EXISTS suppressed_data_v1_0_0.seq_migration_inspection_record START 1; +CREATE SEQUENCE IF NOT EXISTS suppressed_data_v1_0_0.seq_invalid_audit_record START 1; diff --git a/backend/dissemination/sql/fac-snapshot-db/pre/999_finalize.sql b/backend/dissemination/sql/fac-snapshot-db/pre/999_finalize.sql new file mode 100644 index 0000000000..d1168e7a39 --- /dev/null +++ b/backend/dissemination/sql/fac-snapshot-db/pre/999_finalize.sql @@ -0,0 +1,3 @@ +-- PostgREST likes to know when the schemas and things +-- attached to them change. +NOTIFY pgrst, 'reload schema'; diff --git a/backend/dissemination/sql/public.rest b/backend/dissemination/sql/public.rest new file mode 100644 index 0000000000..54a57f049f --- /dev/null +++ b/backend/dissemination/sql/public.rest @@ -0,0 +1,53 @@ +### +GET {{scheme}}://{{apiUrl}}/general?limit=20000&offset=100000 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: {{acceptProfile}} +Accept: application/vnd.pgrst.plan + +### +GET {{scheme}}://{{apiUrl}}/general?report_id=eq.2021-12-CENSUS-0000250449 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v1_1_0 +Accept: application/vnd.pgrst.plan + +### +GET {{scheme}}://{{apiUrl}}/general?report_id=eq.2021-12-CENSUS-0000250449 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 +Accept: application/vnd.pgrst.plan + +### +GET {{scheme}}://{{apiUrl}}/federal_awards?batch_number=eq.200 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 +Accept: application/vnd.pgrst.plan + + +### +GET {{scheme}}://{{apiUrl}}/federal_awards?batch_number=eq.200&limit=1 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 + +### +GET {{scheme}}://{{apiUrl}}/combined?report_id=eq.2021-12-CENSUS-0000250449 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 +Accept: application/vnd.pgrst.plan + +### +GET {{scheme}}://{{apiUrl}}/rpc/get_batch_federal_awards?_batch=3&limit=10 +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 + +### +GET {{scheme}}://{{apiUrl}}/metadata +authorization: {{authorization}} +x-api-user-id: {{xApiUserId}} +accept-profile: api_v2_0_0 diff --git a/backend/dissemination/sql/refresh_materialized_views.sql b/backend/dissemination/sql/refresh_materialized_views.sql deleted file mode 100644 index 4911e13ed0..0000000000 --- a/backend/dissemination/sql/refresh_materialized_views.sql +++ /dev/null @@ -1,3 +0,0 @@ -REFRESH MATERIALIZED VIEW dissemination_combined; - -REFRESH MATERIALIZED VIEW census_gsa_crosswalk; \ No newline at end of file diff --git a/backend/dissemination/sql/sling/bulk_data_export/create_stream_yaml.py b/backend/dissemination/sql/sling/bulk_data_export/create_stream_yaml.py new file mode 100644 index 0000000000..8b07ec32a3 --- /dev/null +++ b/backend/dissemination/sql/sling/bulk_data_export/create_stream_yaml.py @@ -0,0 +1,64 @@ +# Run this as + +# python create_stream_yaml.py > public_data_v1_0_0_single_csv.yaml + +# to regenerate the sling file. + +import yaml +from datetime import datetime + +obj: dict = { + "streams": {}, + "source": "FAC_SNAPSHOT_URI", + "target": "BULK_DATA_EXPORT", + "defaults": { + "target_options": { + "format": "csv", + "compression": "gzip", + "file_max_rows": 0, + } + }, +} + +SCHEMA = "public_data_v1_0_0" + +TABLES = [ + "additional_eins", + "additional_ueis", + "combined", + "corrective_action_plans", + "federal_awards", + "findings", + "findings_text", + "general", + "notes_to_sefa", + "passthrough", + "secondary_auditors", +] + +# Although this attempts to generate files all the way out to 2030, +# it will not generate anything where data does not exist. +# This future-proofs us for a year or two, so we don't have to worry +# about updating the `sling` script that is created. +YEARS = range(2016, 2031) + + +for t in TABLES: + ndx = 0 + for y in YEARS: + obj["streams"][f"{SCHEMA}.{t}.{ndx}"] = { + "object": f"bulk_export/{{MM}}/{y}_{t}.csv", + "sql": f"SELECT * FROM {SCHEMA}.{t} WHERE audit_year = '{y}'", + "mode": "full-refresh", + "target_options": { + "format": "csv", + }, + } + ndx += 1 + +today = datetime.today().strftime("%Y-%m-%d") +print("# DO NOT EDIT; THIS IS A GENERATED FILE") +print("# python create_stream_yaml.py > public_data_v1_0_0_single_csv.yaml") +print(f"# Last generated {today}") +print() +print(yaml.dump(obj)) diff --git a/backend/dissemination/sql/sling/bulk_data_export/public_data_v1_0_0_single_csv.yaml b/backend/dissemination/sql/sling/bulk_data_export/public_data_v1_0_0_single_csv.yaml new file mode 100644 index 0000000000..0f4986797a --- /dev/null +++ b/backend/dissemination/sql/sling/bulk_data_export/public_data_v1_0_0_single_csv.yaml @@ -0,0 +1,1018 @@ +# DO NOT EDIT; THIS IS A GENERATED FILE +# python create_stream_yaml.py > public_data_v1_0_0_single_csv.yaml +# Last generated 2024-10-25 + +defaults: + target_options: + compression: gzip + file_max_rows: 0 + format: csv +source: FAC_SNAPSHOT_URI +streams: + public_data_v1_0_0.additional_eins.0: + mode: full-refresh + object: bulk_export/{MM}/2016_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.additional_eins.1: + mode: full-refresh + object: bulk_export/{MM}/2017_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.additional_eins.10: + mode: full-refresh + object: bulk_export/{MM}/2026_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.additional_eins.11: + mode: full-refresh + object: bulk_export/{MM}/2027_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.additional_eins.12: + mode: full-refresh + object: bulk_export/{MM}/2028_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.additional_eins.13: + mode: full-refresh + object: bulk_export/{MM}/2029_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.additional_eins.14: + mode: full-refresh + object: bulk_export/{MM}/2030_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.additional_eins.2: + mode: full-refresh + object: bulk_export/{MM}/2018_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.additional_eins.3: + mode: full-refresh + object: bulk_export/{MM}/2019_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.additional_eins.4: + mode: full-refresh + object: bulk_export/{MM}/2020_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.additional_eins.5: + mode: full-refresh + object: bulk_export/{MM}/2021_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.additional_eins.6: + mode: full-refresh + object: bulk_export/{MM}/2022_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.additional_eins.7: + mode: full-refresh + object: bulk_export/{MM}/2023_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.additional_eins.8: + mode: full-refresh + object: bulk_export/{MM}/2024_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.additional_eins.9: + mode: full-refresh + object: bulk_export/{MM}/2025_additional_eins.csv + sql: SELECT * FROM public_data_v1_0_0.additional_eins WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.0: + mode: full-refresh + object: bulk_export/{MM}/2016_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.1: + mode: full-refresh + object: bulk_export/{MM}/2017_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.10: + mode: full-refresh + object: bulk_export/{MM}/2026_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.11: + mode: full-refresh + object: bulk_export/{MM}/2027_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.12: + mode: full-refresh + object: bulk_export/{MM}/2028_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.13: + mode: full-refresh + object: bulk_export/{MM}/2029_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.14: + mode: full-refresh + object: bulk_export/{MM}/2030_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.2: + mode: full-refresh + object: bulk_export/{MM}/2018_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.3: + mode: full-refresh + object: bulk_export/{MM}/2019_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.4: + mode: full-refresh + object: bulk_export/{MM}/2020_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.5: + mode: full-refresh + object: bulk_export/{MM}/2021_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.6: + mode: full-refresh + object: bulk_export/{MM}/2022_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.7: + mode: full-refresh + object: bulk_export/{MM}/2023_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.8: + mode: full-refresh + object: bulk_export/{MM}/2024_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.additional_ueis.9: + mode: full-refresh + object: bulk_export/{MM}/2025_additional_ueis.csv + sql: SELECT * FROM public_data_v1_0_0.additional_ueis WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.combined.0: + mode: full-refresh + object: bulk_export/{MM}/2016_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.combined.1: + mode: full-refresh + object: bulk_export/{MM}/2017_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.combined.10: + mode: full-refresh + object: bulk_export/{MM}/2026_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.combined.11: + mode: full-refresh + object: bulk_export/{MM}/2027_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.combined.12: + mode: full-refresh + object: bulk_export/{MM}/2028_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.combined.13: + mode: full-refresh + object: bulk_export/{MM}/2029_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.combined.14: + mode: full-refresh + object: bulk_export/{MM}/2030_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.combined.2: + mode: full-refresh + object: bulk_export/{MM}/2018_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.combined.3: + mode: full-refresh + object: bulk_export/{MM}/2019_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.combined.4: + mode: full-refresh + object: bulk_export/{MM}/2020_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.combined.5: + mode: full-refresh + object: bulk_export/{MM}/2021_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.combined.6: + mode: full-refresh + object: bulk_export/{MM}/2022_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.combined.7: + mode: full-refresh + object: bulk_export/{MM}/2023_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.combined.8: + mode: full-refresh + object: bulk_export/{MM}/2024_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.combined.9: + mode: full-refresh + object: bulk_export/{MM}/2025_combined.csv + sql: SELECT * FROM public_data_v1_0_0.combined WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.0: + mode: full-refresh + object: bulk_export/{MM}/2016_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2016' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.1: + mode: full-refresh + object: bulk_export/{MM}/2017_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2017' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.10: + mode: full-refresh + object: bulk_export/{MM}/2026_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2026' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.11: + mode: full-refresh + object: bulk_export/{MM}/2027_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2027' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.12: + mode: full-refresh + object: bulk_export/{MM}/2028_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2028' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.13: + mode: full-refresh + object: bulk_export/{MM}/2029_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2029' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.14: + mode: full-refresh + object: bulk_export/{MM}/2030_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2030' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.2: + mode: full-refresh + object: bulk_export/{MM}/2018_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2018' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.3: + mode: full-refresh + object: bulk_export/{MM}/2019_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2019' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.4: + mode: full-refresh + object: bulk_export/{MM}/2020_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2020' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.5: + mode: full-refresh + object: bulk_export/{MM}/2021_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2021' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.6: + mode: full-refresh + object: bulk_export/{MM}/2022_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2022' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.7: + mode: full-refresh + object: bulk_export/{MM}/2023_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2023' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.8: + mode: full-refresh + object: bulk_export/{MM}/2024_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2024' + target_options: + format: csv + public_data_v1_0_0.corrective_action_plans.9: + mode: full-refresh + object: bulk_export/{MM}/2025_corrective_action_plans.csv + sql: SELECT * FROM public_data_v1_0_0.corrective_action_plans WHERE audit_year + = '2025' + target_options: + format: csv + public_data_v1_0_0.federal_awards.0: + mode: full-refresh + object: bulk_export/{MM}/2016_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.federal_awards.1: + mode: full-refresh + object: bulk_export/{MM}/2017_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.federal_awards.10: + mode: full-refresh + object: bulk_export/{MM}/2026_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.federal_awards.11: + mode: full-refresh + object: bulk_export/{MM}/2027_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.federal_awards.12: + mode: full-refresh + object: bulk_export/{MM}/2028_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.federal_awards.13: + mode: full-refresh + object: bulk_export/{MM}/2029_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.federal_awards.14: + mode: full-refresh + object: bulk_export/{MM}/2030_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.federal_awards.2: + mode: full-refresh + object: bulk_export/{MM}/2018_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.federal_awards.3: + mode: full-refresh + object: bulk_export/{MM}/2019_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.federal_awards.4: + mode: full-refresh + object: bulk_export/{MM}/2020_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.federal_awards.5: + mode: full-refresh + object: bulk_export/{MM}/2021_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.federal_awards.6: + mode: full-refresh + object: bulk_export/{MM}/2022_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.federal_awards.7: + mode: full-refresh + object: bulk_export/{MM}/2023_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.federal_awards.8: + mode: full-refresh + object: bulk_export/{MM}/2024_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.federal_awards.9: + mode: full-refresh + object: bulk_export/{MM}/2025_federal_awards.csv + sql: SELECT * FROM public_data_v1_0_0.federal_awards WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.findings.0: + mode: full-refresh + object: bulk_export/{MM}/2016_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.findings.1: + mode: full-refresh + object: bulk_export/{MM}/2017_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.findings.10: + mode: full-refresh + object: bulk_export/{MM}/2026_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.findings.11: + mode: full-refresh + object: bulk_export/{MM}/2027_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.findings.12: + mode: full-refresh + object: bulk_export/{MM}/2028_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.findings.13: + mode: full-refresh + object: bulk_export/{MM}/2029_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.findings.14: + mode: full-refresh + object: bulk_export/{MM}/2030_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.findings.2: + mode: full-refresh + object: bulk_export/{MM}/2018_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.findings.3: + mode: full-refresh + object: bulk_export/{MM}/2019_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.findings.4: + mode: full-refresh + object: bulk_export/{MM}/2020_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.findings.5: + mode: full-refresh + object: bulk_export/{MM}/2021_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.findings.6: + mode: full-refresh + object: bulk_export/{MM}/2022_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.findings.7: + mode: full-refresh + object: bulk_export/{MM}/2023_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.findings.8: + mode: full-refresh + object: bulk_export/{MM}/2024_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.findings.9: + mode: full-refresh + object: bulk_export/{MM}/2025_findings.csv + sql: SELECT * FROM public_data_v1_0_0.findings WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.findings_text.0: + mode: full-refresh + object: bulk_export/{MM}/2016_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.findings_text.1: + mode: full-refresh + object: bulk_export/{MM}/2017_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.findings_text.10: + mode: full-refresh + object: bulk_export/{MM}/2026_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.findings_text.11: + mode: full-refresh + object: bulk_export/{MM}/2027_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.findings_text.12: + mode: full-refresh + object: bulk_export/{MM}/2028_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.findings_text.13: + mode: full-refresh + object: bulk_export/{MM}/2029_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.findings_text.14: + mode: full-refresh + object: bulk_export/{MM}/2030_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.findings_text.2: + mode: full-refresh + object: bulk_export/{MM}/2018_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.findings_text.3: + mode: full-refresh + object: bulk_export/{MM}/2019_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.findings_text.4: + mode: full-refresh + object: bulk_export/{MM}/2020_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.findings_text.5: + mode: full-refresh + object: bulk_export/{MM}/2021_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.findings_text.6: + mode: full-refresh + object: bulk_export/{MM}/2022_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.findings_text.7: + mode: full-refresh + object: bulk_export/{MM}/2023_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.findings_text.8: + mode: full-refresh + object: bulk_export/{MM}/2024_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.findings_text.9: + mode: full-refresh + object: bulk_export/{MM}/2025_findings_text.csv + sql: SELECT * FROM public_data_v1_0_0.findings_text WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.general.0: + mode: full-refresh + object: bulk_export/{MM}/2016_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.general.1: + mode: full-refresh + object: bulk_export/{MM}/2017_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.general.10: + mode: full-refresh + object: bulk_export/{MM}/2026_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.general.11: + mode: full-refresh + object: bulk_export/{MM}/2027_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.general.12: + mode: full-refresh + object: bulk_export/{MM}/2028_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.general.13: + mode: full-refresh + object: bulk_export/{MM}/2029_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.general.14: + mode: full-refresh + object: bulk_export/{MM}/2030_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.general.2: + mode: full-refresh + object: bulk_export/{MM}/2018_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.general.3: + mode: full-refresh + object: bulk_export/{MM}/2019_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.general.4: + mode: full-refresh + object: bulk_export/{MM}/2020_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.general.5: + mode: full-refresh + object: bulk_export/{MM}/2021_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.general.6: + mode: full-refresh + object: bulk_export/{MM}/2022_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.general.7: + mode: full-refresh + object: bulk_export/{MM}/2023_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.general.8: + mode: full-refresh + object: bulk_export/{MM}/2024_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.general.9: + mode: full-refresh + object: bulk_export/{MM}/2025_general.csv + sql: SELECT * FROM public_data_v1_0_0.general WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.0: + mode: full-refresh + object: bulk_export/{MM}/2016_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.1: + mode: full-refresh + object: bulk_export/{MM}/2017_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.10: + mode: full-refresh + object: bulk_export/{MM}/2026_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.11: + mode: full-refresh + object: bulk_export/{MM}/2027_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.12: + mode: full-refresh + object: bulk_export/{MM}/2028_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.13: + mode: full-refresh + object: bulk_export/{MM}/2029_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.14: + mode: full-refresh + object: bulk_export/{MM}/2030_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.2: + mode: full-refresh + object: bulk_export/{MM}/2018_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.3: + mode: full-refresh + object: bulk_export/{MM}/2019_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.4: + mode: full-refresh + object: bulk_export/{MM}/2020_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.5: + mode: full-refresh + object: bulk_export/{MM}/2021_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.6: + mode: full-refresh + object: bulk_export/{MM}/2022_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.7: + mode: full-refresh + object: bulk_export/{MM}/2023_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.8: + mode: full-refresh + object: bulk_export/{MM}/2024_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.notes_to_sefa.9: + mode: full-refresh + object: bulk_export/{MM}/2025_notes_to_sefa.csv + sql: SELECT * FROM public_data_v1_0_0.notes_to_sefa WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.passthrough.0: + mode: full-refresh + object: bulk_export/{MM}/2016_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.passthrough.1: + mode: full-refresh + object: bulk_export/{MM}/2017_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.passthrough.10: + mode: full-refresh + object: bulk_export/{MM}/2026_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.passthrough.11: + mode: full-refresh + object: bulk_export/{MM}/2027_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.passthrough.12: + mode: full-refresh + object: bulk_export/{MM}/2028_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.passthrough.13: + mode: full-refresh + object: bulk_export/{MM}/2029_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.passthrough.14: + mode: full-refresh + object: bulk_export/{MM}/2030_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.passthrough.2: + mode: full-refresh + object: bulk_export/{MM}/2018_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.passthrough.3: + mode: full-refresh + object: bulk_export/{MM}/2019_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.passthrough.4: + mode: full-refresh + object: bulk_export/{MM}/2020_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.passthrough.5: + mode: full-refresh + object: bulk_export/{MM}/2021_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.passthrough.6: + mode: full-refresh + object: bulk_export/{MM}/2022_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.passthrough.7: + mode: full-refresh + object: bulk_export/{MM}/2023_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.passthrough.8: + mode: full-refresh + object: bulk_export/{MM}/2024_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.passthrough.9: + mode: full-refresh + object: bulk_export/{MM}/2025_passthrough.csv + sql: SELECT * FROM public_data_v1_0_0.passthrough WHERE audit_year = '2025' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.0: + mode: full-refresh + object: bulk_export/{MM}/2016_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2016' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.1: + mode: full-refresh + object: bulk_export/{MM}/2017_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2017' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.10: + mode: full-refresh + object: bulk_export/{MM}/2026_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2026' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.11: + mode: full-refresh + object: bulk_export/{MM}/2027_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2027' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.12: + mode: full-refresh + object: bulk_export/{MM}/2028_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2028' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.13: + mode: full-refresh + object: bulk_export/{MM}/2029_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2029' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.14: + mode: full-refresh + object: bulk_export/{MM}/2030_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2030' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.2: + mode: full-refresh + object: bulk_export/{MM}/2018_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2018' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.3: + mode: full-refresh + object: bulk_export/{MM}/2019_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2019' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.4: + mode: full-refresh + object: bulk_export/{MM}/2020_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2020' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.5: + mode: full-refresh + object: bulk_export/{MM}/2021_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2021' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.6: + mode: full-refresh + object: bulk_export/{MM}/2022_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2022' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.7: + mode: full-refresh + object: bulk_export/{MM}/2023_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2023' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.8: + mode: full-refresh + object: bulk_export/{MM}/2024_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2024' + target_options: + format: csv + public_data_v1_0_0.secondary_auditors.9: + mode: full-refresh + object: bulk_export/{MM}/2025_secondary_auditors.csv + sql: SELECT * FROM public_data_v1_0_0.secondary_auditors WHERE audit_year = '2025' + target_options: + format: csv +target: BULK_DATA_EXPORT + diff --git a/backend/dissemination/sql/sling/public_data_v1_0_0/public_data_v1_0_0.yaml b/backend/dissemination/sql/sling/public_data_v1_0_0/public_data_v1_0_0.yaml new file mode 100644 index 0000000000..d8741960c0 --- /dev/null +++ b/backend/dissemination/sql/sling/public_data_v1_0_0/public_data_v1_0_0.yaml @@ -0,0 +1,550 @@ +# This uses an env var called PG to +# set the connection string. +source: FAC_SNAPSHOT_URI +target: FAC_SNAPSHOT_URI +env: + SLING_ALLOW_EMPTY: true + +# Handy +# SELECT column_name +# FROM information_schema.columns +# WHERE table_schema = 'public' +# AND table_name = 'dissemination_general' +# AND column_name NOT IN ('id'); + +streams: + # The first table we create is the general table. All of the + # general table is public data, so we copy it wholesale. + public.dissemination_general: + mode: full-refresh + object: public_data_v1_0_0.general + sql: | + -- All of the general table is public data. + SELECT + gen.id as id, + NEXTVAL('public_data_v1_0_0.general') AS seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.agencies_with_prior_findings, + gen.audit_period_covered, + gen.audit_type, + gen.auditee_address_line_1, + gen.auditee_certified_date, + gen.auditee_certify_name, + gen.auditee_certify_title, + gen.auditee_city, + gen.auditee_contact_name, + gen.auditee_contact_title, + gen.auditee_ein, + gen.auditee_email, + gen.auditee_name, + gen.auditee_phone, + gen.auditee_state, + gen.auditee_zip, + gen.auditor_address_line_1, + gen.auditor_certified_date, + gen.auditor_certify_name, + gen.auditor_certify_title, + gen.auditor_city, + gen.auditor_contact_name, + gen.auditor_contact_title, + gen.auditor_country, + gen.auditor_ein, + gen.auditor_email, + gen.auditor_firm_name, + gen.auditor_foreign_address, + gen.auditor_phone, + gen.auditor_state, + gen.auditor_zip, + gen.cognizant_agency, + gen.data_source, + gen.date_created, + gen.dollar_threshold, + gen.entity_type, + gen.fac_accepted_date, + gen.fy_end_date, + gen.fy_start_date, + gen.gaap_results, + gen.is_additional_ueis, + gen.is_aicpa_audit_guide_included, + gen.is_going_concern_included, + gen.is_internal_control_deficiency_disclosed, + gen.is_internal_control_material_weakness_disclosed, + gen.is_low_risk_auditee, + gen.is_material_noncompliance_disclosed, + CASE EXISTS + ( + SELECT + ein.report_id + FROM + dissemination_additionalein ein + WHERE + ein.report_id = gen.report_id + ) + WHEN FALSE THEN 'No' + ELSE 'Yes' + END AS is_multiple_eins, + gen.is_public, + CASE EXISTS + ( + SELECT + aud.report_id + FROM + dissemination_secondaryauditor aud + WHERE + aud.report_id = gen.report_id + ) + WHEN FALSE THEN 'No' + ELSE 'Yes' + END AS is_secondary_auditors, + gen.is_sp_framework_required, + gen.number_months, + gen.oversight_agency, + gen.ready_for_certification_date, + gen.sp_framework_basis, + gen.sp_framework_opinions, + gen.submitted_date, + gen.total_amount_expended, + gen.type_audit_code + FROM + public.dissemination_general gen + ORDER BY gen.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.general + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.general SET batch_number=DIV(seq, 20000); + + # All additional EINs are public. + public.additional_eins: + mode: full-refresh + object: public_data_v1_0_0.additional_eins + sql: | + SELECT + ein.id as id, + div(NEXTVAL('public_data_v1_0_0.additional_eins'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + ein.additional_ein + FROM + public_data_v1_0_0.general gen, + public.dissemination_additionalein ein + WHERE + gen.report_id = ein.report_id + ORDER BY ein.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.additional_eins + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.additional_eins SET batch_number=DIV(seq, 20000); + + # All of the additional UEI info is public info. + public.dissemination_additional_ueis: + mode: full-refresh + object: public_data_v1_0_0.additional_ueis + sql: | + SELECT + uei.id as id, + div(NEXTVAL('public_data_v1_0_0.additional_ueis'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + uei.additional_uei + FROM + public_data_v1_0_0.general gen, + public.dissemination_additionaluei uei + WHERE + gen.report_id = uei.report_id + ORDER BY uei.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.additional_eins + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.additional_eins SET batch_number=DIV(seq, 20000); + + # Corrective action plans are NOT always public. + public.dissemination_corrective_action_plan: + mode: full-refresh + object: public_data_v1_0_0.corrective_action_plans + sql: | + SELECT + cap.id as id, + div(NEXTVAL('public_data_v1_0_0.corrective_action_plans'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + cap.contains_chart_or_table, + cap.finding_ref_number, + cap.planned_action + FROM + public_data_v1_0_0.general gen, + public.dissemination_captext cap + WHERE + cap.report_id = gen.report_id + AND + -- Only include the public corrective action plans. + gen.is_public = true + ORDER BY cap.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.corrective_action_plans + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.corrective_action_plans SET batch_number=DIV(seq, 20000); + + # All Federal award data is public. + public.dissemination_federalaward: + mode: full-refresh + object: public_data_v1_0_0.federal_awards + sql: | + SELECT + award.id as id, + div(NEXTVAL('public_data_v1_0_0.federal_awards'), 20000) as seq, + dg.report_id, + dg.auditee_uei, + dg.audit_year, + dg.fac_accepted_date, + concat(award.federal_agency_prefix,'.',award.federal_award_extension) as aln, + --- + award.additional_award_identification, + award.amount_expended, + award.audit_report_type, + award.award_reference, + award.cluster_name, + award.cluster_total, + award.federal_agency_prefix, + award.federal_award_extension, + award.federal_program_name, + award.federal_program_total, + award.findings_count, + award.is_direct, + award.is_loan, + award.is_major, + award.is_passthrough_award, + award.loan_balance, + award.other_cluster_name, + award.passthrough_amount, + award.state_cluster_name + FROM + public_data_v1_0_0.general dg, + public.dissemination_federalaward award + WHERE + award.report_id = dg.report_id + ORDER BY award.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.federal_awards + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.federal_awards SET batch_number=DIV(seq, 20000); + + # The findings table is public. + public.dissemination_findings: + mode: full-refresh + object: public_data_v1_0_0.findings + sql: | + SELECT + finding.id as id, + div(NEXTVAL('public_data_v1_0_0.findings'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + finding.award_reference, + finding.is_material_weakness, + finding.is_modified_opinion, + finding.is_other_findings, + finding.is_other_matters, + finding.is_questioned_costs, + finding.is_repeat_finding, + finding.is_significant_deficiency, + finding.prior_finding_ref_numbers, + finding.reference_number, + finding.type_requirement + FROM + public_data_v1_0_0.general gen, + public.dissemination_finding finding + WHERE + finding.report_id = gen.report_id + ORDER BY finding.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.findings + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.findings SET batch_number=DIV(seq, 20000); + + # Findings text is NOT always public. + public.dissemination_findingstext: + mode: full-refresh + object: public_data_v1_0_0.findings_text + sql: | + SELECT + ft.id as id, + div(NEXTVAL('public_data_v1_0_0.findings_text'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + ft.finding_ref_number, + ft.contains_chart_or_table, + ft.finding_text + FROM + public_data_v1_0_0.general gen, + public.dissemination_findingtext ft + WHERE + ft.report_id = gen.report_id + AND + gen.is_public = true + ORDER BY ft.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.findings_text + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.findings_text SET batch_number=DIV(seq, 20000); + + # The notes to SEFA are NOT all public. + public.dissemination_notes: + mode: full-refresh + object: public_data_v1_0_0.notes_to_sefa + sql: | + SELECT + note.id as id, + div(NEXTVAL('public_data_v1_0_0.notes_to_sefa'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + note.accounting_policies, + note.contains_chart_or_table, + note.content, + note.is_minimis_rate_used, + note.note_title as title, + note.rate_explained + FROM + public_data_v1_0_0.general gen, + public.dissemination_note note + WHERE + note.report_id = gen.report_id + AND + -- Some notes are not public. + gen.is_public = true + ORDER BY note.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.notes_to_sefa + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.notes_to_sefa SET batch_number=DIV(seq, 20000); + + # All passthrough information is public. + public.dissemination_passthrough: + mode: full-refresh + object: public_data_v1_0_0.passthrough + sql: | + SELECT + pass.id as id, + div(NEXTVAL('public_data_v1_0_0.passthrough'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + pass.award_reference, + pass.passthrough_id, + pass.passthrough_name + FROM + public_data_v1_0_0.general gen, + public.dissemination_passthrough pass + WHERE + gen.report_id = pass.report_id + ORDER BY pass.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.passthrough + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.passthrough SET batch_number=DIV(seq, 20000); + + # All secondary auditor info is public. + public.secondary_auditors: + mode: full-refresh + object: public_data_v1_0_0.secondary_auditors + sql: | + SELECT + sa.id as id, + div(NEXTVAL('public_data_v1_0_0.secondary_auditors'), 20000) as seq, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + sa.address_city, + sa.address_state, + sa.address_street, + sa.address_zipcode, + sa.auditor_ein, + sa.auditor_name, + sa.contact_email, + sa.contact_name, + sa.contact_phone, + sa.contact_title + FROM + public_data_v1_0_0.general gen, + public.dissemination_secondaryauditor sa + WHERE + sa.report_id = gen.report_id + ORDER BY sa.id + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.secondary_auditors + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.secondary_auditors SET batch_number=DIV(seq, 20000); + + public.combined: + mode: full-refresh + object: public_data_v1_0_0.combined + sql: | + SELECT + dg.report_id, + div(NEXTVAL('public_data_v1_0_0.combined'), 20000) as seq, + dfa.award_reference, + df.reference_number, + concat(dfa.federal_agency_prefix,'.',dfa.federal_award_extension) as aln, + -- + -- general + -- + dg.id as general_row_id, + dg.auditee_uei, + dg.audit_year, + dg.agencies_with_prior_findings, + dg.audit_period_covered, + dg.audit_type, + dg.auditee_address_line_1, + dg.auditee_certified_date, + dg.auditee_certify_name, + dg.auditee_certify_title, + dg.auditee_city, + dg.auditee_contact_name, + dg.auditee_contact_title, + dg.auditee_ein, + dg.auditee_email, + dg.auditee_name, + dg.auditee_phone, + dg.auditee_state, + dg.auditee_zip, + dg.auditor_address_line_1, + dg.auditor_certified_date, + dg.auditor_certify_name, + dg.auditor_certify_title, + dg.auditor_city, + dg.auditor_contact_name, + dg.auditor_contact_title, + dg.auditor_country, + dg.auditor_ein, + dg.auditor_email, + dg.auditor_firm_name, + dg.auditor_foreign_address, + dg.auditor_phone, + dg.auditor_state, + dg.auditor_zip, + dg.cognizant_agency, + dg.data_source, + dg.date_created, + dg.dollar_threshold, + dg.entity_type, + dg.fac_accepted_date, + dg.fy_end_date, + dg.fy_start_date, + dg.gaap_results, + dg.is_additional_ueis, + dg.is_aicpa_audit_guide_included, + dg.is_going_concern_included, + dg.is_internal_control_deficiency_disclosed, + dg.is_internal_control_material_weakness_disclosed, + dg.is_low_risk_auditee, + dg.is_material_noncompliance_disclosed, + dg.is_multiple_eins, + dg.is_public, + dg.is_secondary_auditors, + dg.is_sp_framework_required, + dg.number_months, + dg.oversight_agency, + dg.ready_for_certification_date, + dg.sp_framework_basis, + dg.sp_framework_opinions, + dg.submitted_date, + dg.total_amount_expended, + dg.type_audit_code, + -- + -- federal_award + -- + dfa.id as federal_award_row_id, + dfa.additional_award_identification, + dfa.amount_expended, + dfa.audit_report_type, + dfa.cluster_name, + dfa.cluster_total, + dfa.federal_agency_prefix, + dfa.federal_award_extension, + dfa.federal_program_name, + dfa.federal_program_total, + dfa.findings_count, + dfa.is_direct, + dfa.is_loan, + dfa.is_major, + dfa.is_passthrough_award, + dfa.loan_balance, + dfa.other_cluster_name, + dfa.passthrough_amount, + dfa.state_cluster_name, + -- + -- finding + -- + df.id as finding_row_id, + df.is_material_weakness, + df.is_modified_opinion, + df.is_other_findings, + df.is_other_matters, + df.is_questioned_costs, + df.is_repeat_finding, + df.is_significant_deficiency, + df.prior_finding_ref_numbers, + df.type_requirement, + -- + -- passthrough + -- + dp.id as passthrough_row_id, + dp.passthrough_id, + dp.passthrough_name + FROM + public.dissemination_federalaward dfa + LEFT JOIN public_data_v1_0_0.general dg + ON dfa.report_id = dg.report_id + LEFT JOIN public.dissemination_finding df + ON dfa.report_id = df.report_id + AND dfa.award_reference = df.award_reference + LEFT JOIN public.dissemination_passthrough dp + ON dfa.report_id = dp.report_id + AND dfa.award_reference = dp.award_reference + target_options: + post_sql: | + -- Add a clean batch number after the table is created. + ALTER TABLE public_data_v1_0_0.combined + ADD COLUMN batch_number INTEGER; + UPDATE public_data_v1_0_0.combined SET batch_number=DIV(seq, 20000); diff --git a/backend/dissemination/sql/sling/public_data_v1_0_0/public_metadata_v1_0_0.yaml b/backend/dissemination/sql/sling/public_data_v1_0_0/public_metadata_v1_0_0.yaml new file mode 100644 index 0000000000..eb6735387c --- /dev/null +++ b/backend/dissemination/sql/sling/public_data_v1_0_0/public_metadata_v1_0_0.yaml @@ -0,0 +1,73 @@ +# This uses an env var called PG to +# set the connection string. +source: FAC_SNAPSHOT_URI +target: FAC_SNAPSHOT_URI +env: + SLING_ALLOW_EMPTY: true + +# Handy +# SELECT column_name +# FROM information_schema.columns +# WHERE table_schema = 'public' +# AND table_name = 'dissemination_general' +# AND column_name NOT IN ('id'); + +streams: + # This looks at the tables after they have been updated, and + # generates metadata that agencies use to determine if they + # have completely pulled all of the data in their systems. + public_data_v1_0_0.metadata: + mode: full-refresh + object: public_data_v1_0_0.metadata + sql: | + SELECT 'additional_eins' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('additional_eins') + FROM public_data_v1_0_0.additional_eins + UNION + SELECT 'additional_ueis' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('additional_ueis') + FROM public_data_v1_0_0.additional_ueis + UNION + SELECT 'combined' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('combined') + FROM public_data_v1_0_0.combined + UNION + SELECT 'federal_awards' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('federal_awards') + FROM public_data_v1_0_0.federal_awards + UNION + SELECT 'findings_text' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('findings_text') + FROM public_data_v1_0_0.findings_text + UNION + SELECT 'findings' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('findings') + FROM public_data_v1_0_0.findings + UNION + SELECT 'general' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('general') + FROM public_data_v1_0_0.general + UNION + SELECT 'notes_to_sefa' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('notes_to_sefa') + FROM public_data_v1_0_0.notes_to_sefa + UNION + SELECT 'passthrough' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('passthrough') + FROM public_data_v1_0_0.passthrough + UNION + SELECT 'secondary_auditors' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('secondary_auditors') + FROM public_data_v1_0_0.secondary_auditors + UNION + SELECT 'tribal_corrective_action_plans' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('tribal_corrective_action_plans') + FROM public_data_v1_0_0.tribal_corrective_action_plans + UNION + SELECT 'tribal_findings_text' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('tribal_findings_text') + FROM public_data_v1_0_0.tribal_findings_text + UNION + SELECT 'tribal_notes_to_sefa' + AS table, COUNT(*) --, api_v2_0_0_functions.batches('tribal_notes_to_sefa') + FROM public_data_v1_0_0.tribal_notes_to_sefa diff --git a/backend/dissemination/sql/sling/public_data_v1_0_0/tribal_data_v1_0_0.yaml b/backend/dissemination/sql/sling/public_data_v1_0_0/tribal_data_v1_0_0.yaml new file mode 100644 index 0000000000..071294fa23 --- /dev/null +++ b/backend/dissemination/sql/sling/public_data_v1_0_0/tribal_data_v1_0_0.yaml @@ -0,0 +1,90 @@ +# This uses an env var called PG to +# set the connection string. +source: FAC_SNAPSHOT_URI +target: FAC_SNAPSHOT_URI +env: + SLING_ALLOW_EMPTY: true + +# Handy +# SELECT column_name +# FROM information_schema.columns +# WHERE table_schema = 'public' +# AND table_name = 'dissemination_general' +# AND column_name NOT IN ('id'); + +streams: + # Corrective action plans are NOT always public. + public.dissemination_corrective_action_plan: + mode: full-refresh + object: public_data_v1_0_0.tribal_corrective_action_plans + sql: | + SELECT + cap.id as id, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + cap.contains_chart_or_table, + cap.finding_ref_number, + cap.planned_action + FROM + public_data_v1_0_0.general gen, + public.dissemination_captext cap + WHERE + cap.report_id = gen.report_id + AND + -- Only include the public corrective action plans. + gen.is_public = false + ORDER BY cap.id + + # Findings text is NOT always public. + public.dissemination_findingstext: + mode: full-refresh + object: public_data_v1_0_0.tribal_findings_text + sql: | + SELECT + ft.id as id, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + ft.finding_ref_number, + ft.contains_chart_or_table, + ft.finding_text + FROM + public_data_v1_0_0.general gen, + public.dissemination_findingtext ft + WHERE + ft.report_id = gen.report_id + AND + gen.is_public = false + ORDER BY ft.id + + # The notes to SEFA are NOT all public. + public.dissemination_notes: + mode: full-refresh + object: public_data_v1_0_0.tribal_notes_to_sefa + sql: | + SELECT + note.id as id, + gen.report_id, + gen.auditee_uei, + gen.audit_year, + gen.fac_accepted_date, + --- + note.accounting_policies, + note.contains_chart_or_table, + note.content, + note.is_minimis_rate_used, + note.note_title as title, + note.rate_explained + FROM + public_data_v1_0_0.general gen, + public.dissemination_note note + WHERE + note.report_id = gen.report_id + AND + -- Some notes are not public. + gen.is_public = false + ORDER BY note.id diff --git a/backend/dissemination/sql/test_api.py b/backend/dissemination/sql/test_api.py new file mode 100644 index 0000000000..69e246659e --- /dev/null +++ b/backend/dissemination/sql/test_api.py @@ -0,0 +1,178 @@ +from django.test import TestCase +import os +import requests +import sys +import uuid + + +class EnvVars: + FAC_API_KEY = os.getenv("FAC_API_KEY") + FAC_API_KEY_ID = os.getenv("CYPRESS_API_GOV_USER_ID") + FAC_AUTH_BEARER = os.getenv("CYPRESS_API_GOV_JWT") + RECORDS_REQUESTED = 5 + CAN_READ_SUPPRESSED = ( + str(os.getenv("CAN_READ_SUPPRESSED")) + if os.getenv("CAN_READ_SUPPRESSED") is not None + else "0" + ) + + +def url(env): + EnvVars.FAC_API_KEY = os.getenv("FAC_API_KEY") + match env: + case "local": + return "http://localhost:3000" + case "preview": + return "https://api-preview.fac.gov" + case "dev": + return "https://api-dev.fac.gov" + case "staging": + return "https://api-staging.fac.gov" + case "production": + return "https://api.fac.gov" + case _: + print("No environment provided; exiting.") + sys.exit() + + +def headers(env): + if env in ["local"]: + if EnvVars.FAC_AUTH_BEARER is None: + print("FAC_AUTH_BEARER not set.") + sys.exit() + if EnvVars.FAC_API_KEY_ID is None: + print("FAC_API_KEY_ID not set.") + sys.exit() + return { + "authorization": f"bearer {EnvVars.FAC_AUTH_BEARER}", + "x-api-user-id": EnvVars.FAC_API_KEY_ID, + } + elif env in ["preview", "dev", "staging", "production"]: + if EnvVars.FAC_API_KEY is None: + print("FAC_API_KEY not set.") + sys.exit() + return { + "x-api-key": EnvVars.FAC_API_KEY, + } + else: + print("No environment matches for header construction.") + sys.exit() + + +def api(version): + return {"accept-profile": version} + + +def limit(start, end): + return {"Range-Unit": "items", "Range": f"{start}-{end}"} + + +class ApiTests(TestCase): + + ENV = "local" + + def good_resp(self, objs, keys): + """ + Asserts that an API response is: + * A list + * A list composed of objects that all contain the required keys + """ + self.assertIsInstance(objs, list) + self.assertEqual(len(objs), EnvVars.RECORDS_REQUESTED) + for k in keys: + for o in objs: + self.assertIn(k, o) + return True + + def cons(self, env, api_version): + """Constructs the base URL for making multiple API calls off of.""" + + # FIXME: currently, both tests that use this method fail over a "ConnectionRefusedError". + def _helper(endpoint, keys): + base = url(env) + h = ( + headers(env) + | limit(0, EnvVars.RECORDS_REQUESTED - 1) + | api(api_version) + ) + r = requests.get(base + f"/{endpoint}", headers=h, timeout=60) + self.good_resp(r.json(), keys) + + return _helper + + def common_tables(self, f): + """These tables are common to both the old API and the new public data API.""" + + f("general", ["report_id", "audit_year", "auditee_name"]) + f( + "federal_awards", + ["report_id", "amount_expended", "audit_report_type"], + ) + f("corrective_action_plans", ["report_id", "finding_ref_number", "auditee_uei"]) + + def test_api_v1_0_3_not_exist(self): + f = self.cons(self.ENV, "api_v1_0_3") + try: + self.common_tables(f) + print("This schema/API should not exist.") + self.assertTrue(False) + except Exception: + self.assertTrue(True) + + def test_api_v1_1_0(self): + f = self.cons(self.ENV, "api_v1_1_0") + self.common_tables(f) + + def test_api_v2_0_0(self): + f = self.cons(self.ENV, "api_v2_0_0") + self.common_tables(f) + + def test_suppressed_not_accessible_with_bad_key(self): + # Stash the token, and wipe it out, so the API + # calls will fail. + TEMP_FAC_API_KEY_ID = EnvVars.FAC_API_KEY_ID + EnvVars.FAC_API_KEY_ID = str(uuid.uuid4()) + f = self.cons(self.ENV, "api_v2_0_0") + failed_count = 0 + for thunk in [ + lambda: f( + "suppressed_notes_to_sefa", + ["report_id", "content", "is_minimis_rate_used"], + ), + lambda: f("suppressed_findings_text", ["report_id", "finding_ref_number"]), + lambda: f( + "suppressed_corrective_action_plans", + ["report_id", "finding_ref_number", "planned_action"], + ), + ]: + try: + thunk() + except Exception: + if EnvVars.CAN_READ_SUPPRESSED == "0": + failed_count += 1 + self.assertEqual(failed_count, 3) + # Restore it in case we need it in later tests. + EnvVars.FAC_API_KEY_ID = TEMP_FAC_API_KEY_ID + + def test_suppressed_accessible_with_good_key(self): + # Stash the token, and wipe it out, so the API + # calls will fail. + f = self.cons(self.ENV, "api_v2_0_0") + failed_count = 0 + for thunk in [ + lambda: f( + "suppressed_notes_to_sefa", + ["report_id", "content", "is_minimis_rate_used"], + ), + lambda: f("suppressed_findings_text", ["report_id", "finding_ref_number"]), + lambda: f( + "suppressed_corrective_action_plans", + ["report_id", "finding_ref_number", "planned_action"], + ), + ]: + try: + thunk() + except Exception: + if EnvVars.CAN_READ_SUPPRESSED == "1": + failed_count += 1 + self.assertEqual(failed_count, 0) diff --git a/backend/dissemination/test_views.py b/backend/dissemination/test_views.py index f2f3190bb1..1648cae2ea 100644 --- a/backend/dissemination/test_views.py +++ b/backend/dissemination/test_views.py @@ -671,7 +671,7 @@ def _mock_filename(self): return "some-report-name.xlsx", None def _mock_download_url(self): - return "http://example.com/gsa-fac-private-s3/temp/some-report-name.xlsx" + return "http://example.com/fac-private-s3/temp/some-report-name.xlsx" @patch("dissemination.summary_reports.prepare_workbook_for_download") def test_bad_search_returns_400(self, mock_prepare_workbook_for_download): diff --git a/backend/docker-compose-db-only.yml b/backend/docker-compose-db-only.yml new file mode 100644 index 0000000000..14e962c4a8 --- /dev/null +++ b/backend/docker-compose-db-only.yml @@ -0,0 +1,71 @@ +services: + #--------------------------------------------- + # Postgres DB + #--------------------------------------------- + db: + image: "postgres:15" + environment: + POSTGRES_HOST_AUTH_METHOD: trust + volumes: + - postgres-data:/var/lib/postgresql/data/ + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -d postgres -U postgres"] + interval: 10s + timeout: 5s + retries: 10 + db2: + image: "postgres:15" + environment: + POSTGRES_HOST_AUTH_METHOD: "trust" + volumes: + - postgres-data2:/var/lib/postgresql/data/ + ports: + - "5431:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -d postgres -U postgres"] + interval: 10s + timeout: 5s + retries: 10 + + #--------------------------------------------- + # Minio (S3 clone) + #--------------------------------------------- + minio: + container_name: "minio" + image: minio/minio + command: server /tmp/minio --console-address ":9002" + ports: + - "9001:9000" + - "9002:9002" + volumes: + - "minio-vol:/tmp/minio" + + #--------------------------------------------- + # PostgREST API provider + #--------------------------------------------- + api: + image: ghcr.io/gsa-tts/fac/postgrest:latest + ports: + - "3000:3000" + expose: + - "3000" + environment: + # This now requires us to `sling` data to db2 for local API testing. + PGRST_DB_URI: postgres://postgres@db2:5432/postgres + # PGRST2_DB_URI: postgres://postgres@db:5431/postgres + PGRST_OPENAPI_SERVER_PROXY_URI: http://127.0.0.1:3000 + PGRST_DB_ANON_ROLE: anon + # See https://postgrest.org/en/stable/references/api/schemas.html#multiple-schemas for multiple schemas + PGRST_DB_SCHEMAS: "api_v1_0_3,api_v1_1_0,admin_api_v1_1_0,api_v2_0_0" + PGRST_JWT_SECRET: ${PGRST_JWT_SECRET:-32_chars_fallback_secret_testing} # Fallback value for testing environments + # Enable this to inspect the DB plans for queries via EXPLAIN + PGRST_DB_PLAN_ENABLED: 1 + depends_on: + db2: + condition: service_healthy +volumes: + postgres-data: + postgres-data2: + minio-vol: diff --git a/backend/docker-compose-web.yml b/backend/docker-compose-web.yml index 98deabef2e..eb0502e7c2 100644 --- a/backend/docker-compose-web.yml +++ b/backend/docker-compose-web.yml @@ -28,14 +28,6 @@ services: interval: 10s timeout: 5s retries: 10 - - historic-data: - image: ghcr.io/gsa-tts/fac-historic-public-csvs/load-historic-public-data:20230912 - depends_on: - db: - condition: service_healthy - environment: - DATABASE_URL: postgres://postgres@db/postgres web: image: ghcr.io/gsa-tts/fac/web-container:latest @@ -94,12 +86,11 @@ services: expose: - "3000" environment: - PGRST_DB_URI: postgres://postgres@db:5432/postgres - PGRST2_DB_URI: postgres://postgres@db:5431/postgres + PGRST_DB_URI: postgres://postgres@db2:5432/postgres PGRST_OPENAPI_SERVER_PROXY_URI: http://127.0.0.1:3000 PGRST_DB_ANON_ROLE: anon # See https://postgrest.org/en/stable/references/api/schemas.html#multiple-schemas for multiple schemas - PGRST_DB_SCHEMAS: "api_v1_0_3, api_v1_1_0, admin_api_v1_1_0" + PGRST_DB_SCHEMAS: "api_v1_1_0,api_v2_0_0" PGRST_JWT_SECRET: ${PGRST_JWT_SECRET:-32_chars_fallback_secret_testing} # Fallback value for testing environments depends_on: db: diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index f8ee53cada..e876017448 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,11 +1,10 @@ -version: "3.7" - services: #--------------------------------------------- # Postgres DB #--------------------------------------------- db: image: "postgres:15" + shm_size: 1g environment: POSTGRES_HOST_AUTH_METHOD: trust volumes: @@ -19,6 +18,7 @@ services: retries: 10 db2: image: "postgres:15" + shm_size: 1g environment: POSTGRES_HOST_AUTH_METHOD: "trust" volumes: @@ -31,17 +31,6 @@ services: timeout: 5s retries: 10 - #--------------------------------------------- - # Historic data - #--------------------------------------------- - historic-data: - image: ghcr.io/gsa-tts/fac-historic-public-csvs/load-historic-public-data:20230912 - depends_on: - db: - condition: service_healthy - environment: - DATABASE_URL: postgres://postgres@db/postgres - #--------------------------------------------- # Django app #--------------------------------------------- @@ -62,7 +51,9 @@ services: api: condition: service_started environment: - DATABASE_URL: postgres://postgres@db/postgres + # Should these be mode disable locally? + DATABASE_URL: postgres://postgres@db/postgres?sslmode=disable + SNAPSHOT_URL: postgres://postgres@db2/postgres?sslmode=disable POSTGREST_URL: http://api:3000 DJANGO_DEBUG: true SAM_API_KEY: ${SAM_API_KEY} @@ -76,7 +67,6 @@ services: DISABLE_AUTH: ${DISABLE_AUTH:-False} PGRST_JWT_SECRET: ${PGRST_JWT_SECRET:-32_chars_fallback_secret_testing} ENABLE_DEBUG_TOOLBAR: false - env_file: - ".env" ports: @@ -101,15 +91,35 @@ services: #--------------------------------------------- # Minio (S3 clone) #--------------------------------------------- + # minio: + # container_name: "minio" + # image: minio/minio + # command: server /tmp/minio --console-address ":9002" + # ports: + # - "9001:9000" + # - "9002:9002" + # volumes: + # - "minio-vol:/tmp/minio" minio: - container_name: "minio" - image: minio/minio - command: server /tmp/minio --console-address ":9002" + image: minio/minio:latest + command: server /data --console-address ":9002" + expose: + - "9001" + - "9002" ports: - - "9001:9000" - - "9002:9002" + - 9001:9000 + - 9002:9002 + environment: + MINIO_ROOT_USER: singleauditclearinghouse + MINIO_ROOT_PASSWORD: singleauditclearinghouse + MINIO_API_ROOT_ACCESS: on + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 5s + timeout: 5s + retries: 5 volumes: - - "minio-vol:/tmp/minio" + - minio-vol:/data #--------------------------------------------- # PostgREST API provider @@ -121,17 +131,16 @@ services: expose: - "3000" environment: - PGRST_DB_URI: postgres://postgres@db:5432/postgres - PGRST2_DB_URI: postgres://postgres@db:5431/postgres + PGRST_DB_URI: postgres://postgres@db2:5432/postgres PGRST_OPENAPI_SERVER_PROXY_URI: http://127.0.0.1:3000 PGRST_DB_ANON_ROLE: anon # See https://postgrest.org/en/stable/references/api/schemas.html#multiple-schemas for multiple schemas - PGRST_DB_SCHEMAS: "api_v1_0_3, api_v1_1_0, admin_api_v1_1_0" + PGRST_DB_SCHEMAS: "api_v1_1_0,api_v2_0_0" PGRST_JWT_SECRET: ${PGRST_JWT_SECRET:-32_chars_fallback_secret_testing} # Fallback value for testing environments # Enable this to inspect the DB plans for queries via EXPLAIN - PGRST_DB_PLAN_ENABLED: ${PGRST_DB_PLAN_ENABLED:-false} + PGRST_DB_PLAN_ENABLED: 1 depends_on: - db: + db2: condition: service_healthy volumes: postgres-data: diff --git a/backend/fac-backup-util.sh b/backend/fac-backup-util.sh index 5804193ecb..11f0452b6e 100755 --- a/backend/fac-backup-util.sh +++ b/backend/fac-backup-util.sh @@ -55,6 +55,12 @@ elif [ "$run_option" == "deploy_backup" ]; then gonogo "db_to_db" AWSS3Sync "$s3_name" "$backup_s3_name" gonogo "s3_sync" +elif [ "$run_option" == "rds_backup" ]; then + GetUtil + InstallAWS + gonogo "install_aws" + RDSToRDS "$db_name" "$backup_db_name" "backup" + gonogo "db_to_db" elif [ "$run_option" == "scheduled_backup" ]; then GetUtil InstallAWS diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1ac8180ed8..267a9480bd 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -35,11 +35,14 @@ disable = [ [tool.bandit] # TDB 2022-12-07 - test_schemas and test_validators use random number generation, which bandit doesn't like +# 2024-11-14 - create_stream_yaml and create_partition_statements will never be touched by users, yet bandit doesn't like the SQL injection risk. exclude_dirs = [ "audit/cross_validation/test_check_has_federal_awards.py", "audit/test_schemas.py", "audit/test_validators.py", "node_modules", + "dissemination/sql/fac-snapshot-db/create_partition_statements.py", + "dissemination/sql/sling/bulk_data_export/create_stream_yaml.py", "dissemination/test_search.py", "dissemination/tests.py", ".venv" diff --git a/backend/run.sh b/backend/run.sh index cfd0fad650..0407815c0a 100755 --- a/backend/run.sh +++ b/backend/run.sh @@ -1,19 +1,21 @@ #!/bin/bash # Source everything; everything is now a function. -# Remember: bash has no idea if a function exists, -# so a typo in a function name will fail silently. Similarly, +# Remember: bash has no idea if a function exists. Similarly, # bash has horrible scoping, so use of `local` in functions is # critical for cleanliness in the startup script. source tools/util_startup.sh # This will choose the correct environment # for local envs (LOCAL or TESTING) and cloud.gov source tools/setup_env.sh -source tools/curation_audit_tracking_init.sh -source tools/api_teardown.sh +source tools/cgov_util_local_only.sh +source tools/curation_audit_tracking_disable.sh +source tools/sling_bulk_export.sh source tools/migrate_app_tables.sh -source tools/api_standup.sh source tools/seed_cog_baseline.sh +source tools/setup_env.sh +source tools/sql_pre_post.sh +source tools/util_startup.sh ##### # SETUP THE LOCAL ENVIRONMENT @@ -21,11 +23,28 @@ setup_env gonogo "setup_env" ##### -# API TEARDOWN -# API has to be deprecated/removed before migration, because -# of tight coupling between schema/views and the dissemination tables -api_teardown -gonogo "api_teardown" +# SIMULATE DEPLOY BACKUP +# Before we deploy, we always get a copy of dissemination_* +# tables into fac-snapshot-db. We need to simulate this locally +# so that we can run SQL pre/post operations on fac-snapshot-db. +cgov_util_local_only +gonogo "cgov_util_local_only" + +##### +# SIMULATE NIGHTLY DATA/API CREATION +sql_pre_fac_snapshot_db +gonogo "sql_pre_fac_snapshot_db" +sql_post_fac_snapshot_db +gonogo "sql_post_fac_snapshot_db" + +##### +# SQL PRE +# We have SQL that we want to run before the migrations and sling are run. +# This tears down things that would conflict with migrations, etc. +sql_pre_fac_db +gonogo "sql_pre" +curation_audit_tracking_disable +gonogo "curation_audit_tracking_disable" ##### # MIGRATE APP TABLES @@ -33,15 +52,11 @@ migrate_app_tables gonogo "migrate_app_tables" ##### -# API STANDUP -# Standup the API, which may depend on migration changes -api_standup -gonogo "api_standup" - -##### -# CURATION AUDIT TRACKING -curation_audit_tracking_init -gonogo "curation_audit_tracking_init" +# SQL POST +# Rebuild the API and prepare the system for execution. +# Runs after migrations. +sql_post_fac_db +gonogo "sql_post" ##### # SEED COG/OVER TABLES @@ -49,7 +64,13 @@ gonogo "curation_audit_tracking_init" seed_cog_baseline gonogo "seed_cog_baseline" +##### +# CREATE STAFF USERS +# Prepares staff users for Django admin +python manage.py create_staffusers + ##### # LAUNCH THE APP # We will have died long ago if things didn't work. npm run dev & python manage.py runserver 0.0.0.0:8000 + diff --git a/backend/tools/api_standup.sh b/backend/tools/api_standup.sh deleted file mode 100644 index 23567bed9a..0000000000 --- a/backend/tools/api_standup.sh +++ /dev/null @@ -1,29 +0,0 @@ -source tools/util_startup.sh - -function api_standup { - startup_log "API_STANDUP" "BEGIN" - - # First create non-managed tables - startup_log "CREATE_API_ACCESS_TABLES" "BEGIN" - python manage.py create_api_access_tables - local d1=$? - startup_log "CREATE_API_ACCESS_TABLES" "END" - - # Bring the API back, possibly installing a new API - startup_log "CREATE_API_SCHEMA" "BEGIN" - python manage.py create_api_schema - local d2=$? - startup_log "CREATE_API_SCHEMA" "END" - - startup_log "CREATE_API_VIEWS" "BEGIN" - python manage.py create_api_views && - local d3=$? - startup_log "CREATE_API_VIEWS" "END" - - startup_log "API_STANDUP" "END" - - result=$((($d1 + $d2) + $d3)) - # If these are all zero, we're all good. - return $result -} - diff --git a/backend/tools/api_teardown.sh b/backend/tools/api_teardown.sh deleted file mode 100644 index 80292d3181..0000000000 --- a/backend/tools/api_teardown.sh +++ /dev/null @@ -1,20 +0,0 @@ -source tools/util_startup.sh - -function api_teardown { - startup_log "API_TEARDOWN" "BEGIN" - - startup_log "DROP_DEPRECATED_API_SCHEMA_AND_VIEWS" "BEGIN" - python manage.py drop_deprecated_api_schema_and_views - local d1=$? - startup_log "DROP_DEPRECATED_API_SCHEMA_AND_VIEWS" "END" - startup_log "DROP_API_SCHEMA" "BEGIN" - python manage.py drop_api_schema - local d2=$? - startup_log "DROP_API_SCHEMA" "END" - - startup_log "API_TEARDOWN" "END" - - result=$(($d1 + $d2)) - # If these are both zero, we're all good. - return $result -} diff --git a/backend/tools/build_indexes.sh b/backend/tools/build_indexes.sh new file mode 100644 index 0000000000..18452b393f --- /dev/null +++ b/backend/tools/build_indexes.sh @@ -0,0 +1,29 @@ +source tools/util_startup.sh + +function build_indexes { + startup_log "BUILD INDEXES" "BEGIN" + local base_path='dissemination/sql' + local location='indexes' + local which_db='fac-snapshot-db' + # This loops by index, so we can use two arrays. + for index in "${!db2_indexes[@]}" + do + local dbindex="${db2_indexes[index]}" + local required_table="${db2_indexes_required_tables[index]}" + # If the file to stand up the API exists... + if [ -f ${base_path}/${location}/$which_db/${dbindex}.sql ]; then + check_table_exists $FAC_SNAPSHOT_URI $required_table + local result=$? + echo "check_table_exists $required_table $result" + if [ $result -eq 0 ]; then + run_sql $FAC_SNAPSHOT_URI $base_path $location $which_db ${dbindex}.sql + gonogo "${dbindex}" + else + echo "API TABLE NOT FOUND/SKIPPED $required_table not found for $dbindex" + fi + else + echo "API FILE NOT FOUND/SKIPPED ${base_path}/${location}/$which_db/${dbindex}.sql" + fi + done + startup_log "BUILD INDEXES" "END" +} diff --git a/backend/tools/cgov_util_local_only.sh b/backend/tools/cgov_util_local_only.sh new file mode 100644 index 0000000000..fecc5550ff --- /dev/null +++ b/backend/tools/cgov_util_local_only.sh @@ -0,0 +1,26 @@ +source tools/util_startup.sh + +function cgov_util_local_only() { + + # Really, really only run this locally. Or in a GH runner. + + if [[ "${ENV}" == "LOCAL" || "${ENV}" == "TESTING" ]]; then + startup_log "CGOV_LOCAL_ONLY" "Making an initial 'backup'" + + $PSQL_EXE $FAC_SNAPSHOT_URI -c "DROP SCHEMA IF EXISTS public CASCADE" + gonogo "DROP PUBLIC in fac-snapshot-db" + $PSQL_EXE $FAC_SNAPSHOT_URI -c "CREATE SCHEMA public" + gonogo "CREATE PUBLIC fac-snapshot-db" + + # This is the first run. + startup_log "CGOV_LOCAL_ONLY" "Running cgov-util INITIAL." + $CGOV_UTIL_EXE db_to_db \ + --src_db fac-db \ + --dest_db fac-snapshot-db \ + --operation initial + + startup_log "CGOV_LOCAL_ONLY" "Done" + fi + + return 0 +} diff --git a/backend/tools/curation_audit_tracking_init.sh b/backend/tools/curation_audit_tracking_disable.sh similarity index 70% rename from backend/tools/curation_audit_tracking_init.sh rename to backend/tools/curation_audit_tracking_disable.sh index d30e66499a..e2597a5d05 100644 --- a/backend/tools/curation_audit_tracking_init.sh +++ b/backend/tools/curation_audit_tracking_disable.sh @@ -1,8 +1,7 @@ source tools/util_startup.sh -function curation_audit_tracking_init { +function curation_audit_tracking_disable() { startup_log "CURATION_AUDIT_TRACKING" "BEGIN" - python manage.py curation_audit_tracking --init python manage.py curation_audit_tracking --disable local result=$? startup_log "CURATION_AUDIT_TRACKING" "END" diff --git a/backend/tools/example_vcap.json b/backend/tools/example_vcap.json new file mode 100644 index 0000000000..ba6f776b66 --- /dev/null +++ b/backend/tools/example_vcap.json @@ -0,0 +1,201 @@ +{ + "s3": [ + { + "label": "s3", + "provider": "alpha-provider", + "plan": "basic", + "name": "backups", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDALPHA1", + "instance_name": "backups", + "binding_guid": "UUIDALPHA2", + "binding_name": null, + "credentials": { + "uri": "s3://ACCESSKEYIDALPHA:SECRETACCESSKEYALPHA@s3-us-gov-alpha-1.amazonaws.com/BROKEREDBUCKETALPHA", + "insecure_skip_verify": false, + "access_key_id": "ACCESSKEYIDALPHA", + "secret_access_key": "SECRETACCESSKEY+ALPHA", + "region": "us-gov-west-1", + "bucket": "BROKEREDBUCKETALPHA", + "endpoint": "s3-us-gov-alpha-1.amazonaws.com", + "fips_endpoint": "s3-fips.us-gov-alpha-1.amazonaws.com", + "additional_buckets": [] + }, + "syslog_drain_url": "https://ALPHA.drain.url", + "volume_mounts": [ + "no_mounts" + ] + }, + { + "label": "s3", + "provider": null, + "plan": "basic", + "name": "fac-private-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDBRAVO1", + "instance_name": "fac-private-s3", + "binding_guid": "UUIDBRAVO2", + "binding_name": null, + "credentials": { + "uri": "s3://ACCESSKEYIDBRAVO:SECRETACCESSKEYBRAVO@s3-us-gov-bravo-1.amazonaws.com/BROKEREDBUCKETBRAVO1", + "insecure_skip_verify": false, + "access_key_id": "ACCESSKEYIDBRAVO", + "secret_access_key": "SECRETACCESSKEYBRAVO", + "region": "us-gov-bravo-1", + "bucket": "BROKEREDBUCKETBRAVO1", + "endpoint": "s3-us-gov-bravo-1.amazonaws.com", + "fips_endpoint": "s3-fips.us-gov-bravo-1.amazonaws.com", + "additional_buckets": [ + "BROKEREDBUCKETBRAVO2" + ] + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "s3", + "provider": null, + "plan": "basic", + "name": "fac-public-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDCHARLIE1", + "instance_name": "fac-public-s3", + "binding_guid": "UUIDCHARLIE2", + "binding_name": null, + "credentials": { + "uri": "s3://ACCESSKEYIDCHARLIE:SECRETACCESSKEYCHARLIE@s3-us-gov-charlie-1.amazonaws.com/BROKEREDBUCKETCHARLIE", + "insecure_skip_verify": false, + "access_key_id": "ACCESSKEYIDCHARLIE", + "secret_access_key": "SECRETACCESSKEYCHARLIE", + "region": "us-gov-west-1", + "bucket": "BROKEREDBUCKETCHARLIE", + "endpoint": "s3-us-gov-charlie-1.amazonaws.com", + "fips_endpoint": "s3-fips.us-gov-charlie-1.amazonaws.com", + "additional_buckets": [] + }, + "syslog_drain_url": null, + "volume_mounts": [] + } + ], + "user-provided": [ + { + "label": "user-provided", + "name": "key-service", + "tags": [], + "instance_guid": "UUIDDELTA1", + "instance_name": "key-service", + "binding_guid": "UUIDDELTA2", + "binding_name": null, + "credentials": { + "DJANGO_SECRET_LOGIN_KEY": "DJANGOSECRETKEYDELTA", + "LOGIN_CLIENT_ID": "urn:gov:gsa:openidconnect.profiles:sp:sso:delta:jwt", + "SAM_API_KEY": "APIKEYDELTA", + "SECRET_KEY": "DJANGOSECRETKEYDELTA" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "user-provided", + "name": "https-proxy-creds", + "tags": [], + "instance_guid": "UUIDECHO1", + "instance_name": "https-proxy-creds", + "binding_guid": "UUIDECHO2", + "binding_name": null, + "credentials": { + "domain": "echo.egress-https-proxy.apps.internal", + "password": "PASSWORDECHO", + "port": "61443", + "protocol": "https", + "uri": "https://USERNAMEECHO:PASSWORDECHO@echo.egress-https-proxy.apps.internal:61443", + "username": "USERNAMEECHO" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "user-provided", + "name": "smtp-proxy-creds", + "tags": [], + "instance_guid": "UUIDFOXTROT1", + "instance_name": "smtp-proxy-creds", + "binding_guid": "UUIDFOXTROT2", + "binding_name": null, + "credentials": { + "domain": "echo.egress-smtp-proxy.apps.internal", + "port": "8080" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "user-provided", + "name": "newrelic-creds", + "tags": [ + "newrelic-creds" + ], + "instance_guid": "UUIDGOLF1", + "instance_name": "newrelic-creds", + "binding_guid": "UUIDGOLF2", + "binding_name": null, + "credentials": { + "NEW_RELIC_LICENSE_KEY": "LICENCEKEYGOLF", + "NEW_RELIC_LOGS_ENDPOINT": "https://golf.newrelic.com/log/v1" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "user-provided", + "name": "logdrain", + "tags": [], + "instance_guid": "UUIDHOTEL1", + "instance_name": "logdrain", + "binding_guid": "UUIDHOTEL2", + "binding_name": null, + "credentials": null, + "syslog_drain_url": "https://HOTELUSERNAME:HOTELPASSWORD@hotel.app.cloud.gov/?drain-type=all", + "volume_mounts": [] + } + ], + "aws-rds": [ + { + "label": "aws-rds", + "provider": null, + "plan": "medium-gp-psql", + "name": "db", + "tags": [ + "database", + "RDS" + ], + "instance_guid": "UUIDINDIA1", + "instance_name": "db", + "binding_guid": "UUIDINDIA2", + "binding_name": null, + "credentials": { + "db_name": "DBNAMEINDIA", + "host": "host.us-gov-india-1.rds.amazonaws.com", + "name": "DBNAMEINDIA", + "password": "PASSWORDINDIA", + "port": "5432", + "uri": "postgres://USERNAMEINDIA:PASSWORDINDIA@host.us-gov-india-1.rds.amazonaws.com:5432/DBNAMEINDIA", + "username": "USERNAMEINDIA" + }, + "syslog_drain_url": null, + "volume_mounts": [] + } + ] +} diff --git a/backend/tools/setup_cgov_env.sh b/backend/tools/setup_cgov_env.sh index f944100f8b..60a785052f 100644 --- a/backend/tools/setup_cgov_env.sh +++ b/backend/tools/setup_cgov_env.sh @@ -1,7 +1,38 @@ source tools/util_startup.sh +# Aliases need to be outside of function scope + function setup_cgov_env { - set -e + startup_log "CGOV_ENV" "We are in a cloud.gov envirnoment." + + # https://stackoverflow.com/questions/48712545/break-jq-query-string-into-lines + # jq is fine with line breaks in strings. Just don't escape them. + # Makes long queries more readable. Maybe. + export AWS_PRIVATE_BUCKET_NAME=$(echo $VCAP_SERVICES \ + | jq '.s3 + | map(select(.instance_name + | contains("fac-private-s3"))) + | .[] .credentials.bucket') + export AWS_PUBLIC_BUCKET_NAME=$(echo $VCAP_SERVICES \ + | jq '.s3 + | map(select(.instance_name + | contains("fac-public-s3"))) + | .[] .credentials.bucket') + + get_aws_s3 "fac-private-s3" "access_key_id" + export AWS_PRIVATE_ACCESS_KEY_ID=$_GET_AWS_RESULT + get_aws_s3 "fac-private-s3" "secret_access_key" + export AWS_PRIVATE_SECRET_ACCESS_KEY=$_GET_AWS_RESULT + get_aws_s3 "fac-private-s3" "endpoint" + export AWS_S3_PRIVATE_ENDPOINT=$_GET_AWS_RESULT + + get_aws_s3 "fac-public-s3" "access_key_id" + export AWS_PUBLIC_ACCESS_KEY_ID=$_GET_AWS_RESULT + get_aws_s3 "fac-public-s3" "secret_access_key" + export AWS_PUBLIC_SECRET_ACCESS_KEY=$_GET_AWS_RESULT + get_aws_s3 "fac-public-s3" "endpoint" + export AWS_S3_PUBLIC_ENDPOINT=$_GET_AWS_RESULT + export SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt @@ -38,5 +69,15 @@ function setup_cgov_env { export NEW_RELIC_HOST="gov-collector.newrelic.com" # https://docs.newrelic.com/docs/apm/agents/python-agent/configuration/python-agent-configuration/#proxy export NEW_RELIC_PROXY_HOST="$https_proxy" + + # For database work + export FAC_DB_URI="$(echo "$VCAP_SERVICES" | jq --raw-output --arg service_name "fac-db" ".[][] | select(.name == \$service_name) | .credentials.uri")" + export FAC_SNAPSHOT_URI="$(echo "$VCAP_SERVICES" | jq --raw-output --arg service_name "fac-snapshot-db" ".[][] | select(.name == \$service_name) | .credentials.uri")" + # https://stackoverflow.com/questions/37072245/check-return-status-of-psql-command-in-unix-shell-scripting + export PSQL_EXE='/home/vcap/deps/0/apt/usr/lib/postgresql/*/bin/psql --single-transaction -v ON_ERROR_STOP=on' + export PSQL_EXE_NO_TXN='/home/vcap/deps/0/apt/usr/lib/postgresql/*/bin/psql -v ON_ERROR_STOP=on' + + export SLING_EXE='/home/vcap/app/sling' + return 0 } diff --git a/backend/tools/setup_local_env.sh b/backend/tools/setup_local_env.sh index 5cf30af2f0..2c9da76210 100644 --- a/backend/tools/setup_local_env.sh +++ b/backend/tools/setup_local_env.sh @@ -4,14 +4,126 @@ function setup_local_env { if [[ "${ENV}" == "LOCAL" || "${ENV}" == "TESTING" ]]; then startup_log "LOCAL_ENV" "We are in a local envirnoment." - export AWS_PRIVATE_ACCESS_KEY_ID=longtest - export AWS_PRIVATE_SECRET_ACCESS_KEY=longtest - export AWS_S3_PRIVATE_ENDPOINT="http://minio:9000" - mc alias set myminio "${AWS_S3_PRIVATE_ENDPOINT}" minioadmin minioadmin + + # Load a fake VCAP_SERVICES file into the environment variable, + # so we can mimic the cloud.gov setup. + export VCAP_SERVICES=$(cat config/vcap_services_for_containers.json) + check_env_var_not_empty "VCAP_SERVICES" + + # export AWS_PUBLIC_ACCESS_KEY_ID="singleauditclearinghouse" + # export AWS_PUBLIC_SECRET_ACCESS_KEY="singleauditclearinghouse" + # export AWS_S3_PUBLIC_ENDPOINT="http://minio:9000" + + + # https://stackoverflow.com/questions/48712545/break-jq-query-string-into-lines + # jq is fine with line breaks in strings. Just don't escape them. + # Makes long queries more readable. Maybe. + + # export AWS_PUBLIC_BUCKET_NAME="fac-public-s3" + # export AWS_PRIVATE_BUCKET_NAME="fac-private-s3" + + export AWS_PRIVATE_BUCKET_NAME=$(echo $VCAP_SERVICES \ + | jq --raw-output '.s3 + | map(select(.instance_name + | contains("fac-private-s3"))) + | .[] .credentials.bucket') + check_env_var_not_empty "AWS_PRIVATE_BUCKET_NAME" + + export AWS_PUBLIC_BUCKET_NAME=$(echo $VCAP_SERVICES \ + | jq --raw-output '.s3 + | map(select(.instance_name + | contains("fac-public-s3"))) + | .[] .credentials.bucket') + + + # export AWS_PRIVATE_ACCESS_KEY_ID="singleauditclearinghouse" + # export AWS_PRIVATE_SECRET_ACCESS_KEY="singleauditclearinghouse" + # export AWS_S3_PRIVATE_ENDPOINT="http://minio:9000" + + get_aws_s3 "fac-private-s3" "access_key_id" + export AWS_PRIVATE_ACCESS_KEY_ID=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_PRIVATE_ACCESS_KEY_ID" + + get_aws_s3 "fac-private-s3" "secret_access_key" + export AWS_PRIVATE_SECRET_ACCESS_KEY=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_PRIVATE_SECRET_ACCESS_KEY" + + get_aws_s3 "fac-private-s3" "endpoint" + export AWS_S3_PRIVATE_ENDPOINT=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_S3_PRIVATE_ENDPOINT" + + get_aws_s3 "fac-public-s3" "access_key_id" + export AWS_PUBLIC_ACCESS_KEY_ID=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_PUBLIC_ACCESS_KEY_ID" + + get_aws_s3 "fac-public-s3" "secret_access_key" + export AWS_PUBLIC_SECRET_ACCESS_KEY=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_PUBLIC_SECRET_ACCESS_KEY" + + get_aws_s3 "fac-public-s3" "endpoint" + export AWS_S3_PUBLIC_ENDPOINT=$_GET_AWS_RESULT + check_env_var_not_empty "AWS_S3_PUBLIC_ENDPOINT" + + #export MC_HOST_=https://::@ + export MC_HOST_myminio="http://${AWS_PRIVATE_ACCESS_KEY_ID}:${AWS_PRIVATE_SECRET_ACCESS_KEY}@minio:9000" + # mc alias set myminio ${AWS_S3_PRIVATE_ENDPOINT} ${AWS_PRIVATE_ACCESS_KEY_ID} ${AWS_PRIVATE_ACCESS_KEY_ID} + # until (mc config host add myminio $AWS_PRIVATE_ENDPOINT singleauditclearinghouse singleauditclearinghouse) do echo '...waiting...' && sleep 1; done; # Do nothing if the bucket already exists. # https://min.io/docs/minio/linux/reference/minio-mc/mc-mb.html - mc mb --ignore-existing myminio/gsa-fac-private-s3 - mc admin user svcacct add --access-key="${AWS_PRIVATE_ACCESS_KEY_ID}" --secret-key="${AWS_PRIVATE_SECRET_ACCESS_KEY}" myminio minioadmin + mc mb --ignore-existing myminio/${AWS_PUBLIC_BUCKET_NAME} + mc mb --ignore-existing myminio/${AWS_PRIVATE_BUCKET_NAME} + + # MCJ 20241016 FIXME: Is this even needed locally? I don't think so. + # mc admin user svcacct add \ + # --access-key="${AWS_PRIVATE_ACCESS_KEY_ID}" \ + # --secret-key="${AWS_PRIVATE_SECRET_ACCESS_KEY}" \ + # myminio minioadmin + + # For database work + export FAC_DB_URI=${DATABASE_URL} #?sslmode=disable + export FAC_SNAPSHOT_URI=${SNAPSHOT_URL} + export PSQL_EXE='psql --single-transaction -v ON_ERROR_STOP=on' + export PSQL_EXE_NO_TXN='psql -v ON_ERROR_STOP=on' + + + # Locally, we need to pull in sling. + # In production, it gets pulled in via the build/deploy process. + pushd /tmp + curl -LO 'https://github.com/slingdata-io/sling-cli/releases/latest/download/sling_linux_amd64.tar.gz' + tar xf sling_linux_amd64.tar.gz + rm -f sling_linux_amd64.tar.gz + chmod +x sling + mv sling /bin/sling + popd + + # And we need cgov-util + pushd /tmp + local CGOV_VERSION=v0.1.9 + curl -L -O https://github.com/GSA-TTS/fac-backup-utility/releases/download/${CGOV_VERSION}/gov.gsa.fac.cgov-util-${CGOV_VERSION}-linux-amd64.tar.gz + tar xvzf gov.gsa.fac.cgov-util-${CGOV_VERSION}-linux-amd64.tar.gz gov.gsa.fac.cgov-util + chmod 755 gov.gsa.fac.cgov-util + mv gov.gsa.fac.cgov-util /bin/cgov-util + popd + + export SLING_EXE='/bin/sling' + export CGOV_UTIL_EXE='/bin/cgov-util' + + show_env_var "AWS_S3_PRIVATE_ENDPOINT" + + $SLING_EXE conns set \ + BULK_DATA_EXPORT \ + type=s3 \ + bucket="${AWS_PRIVATE_BUCKET_NAME}" \ + access_key_id="${AWS_PRIVATE_ACCESS_KEY_ID}" \ + secret_access_key="${AWS_PRIVATE_SECRET_ACCESS_KEY}" \ + endpoint="${AWS_S3_PRIVATE_ENDPOINT}" + $SLING_EXE conns test BULK_DATA_EXPORT + gonogo "local_minio_conns_test" + + # We need a config.json in the directory we are running + # things from (or PWD). + cp util/load_public_dissem_data/data/config.json . + gonogo "local copy of config for cgov-util" return 0 fi; } diff --git a/backend/tools/sling_bulk_export.sh b/backend/tools/sling_bulk_export.sh new file mode 100644 index 0000000000..2e97e9cc51 --- /dev/null +++ b/backend/tools/sling_bulk_export.sh @@ -0,0 +1,12 @@ +source tools/util_startup.sh + +function sling_bulk_export() { + startup_log "SLING_BULK_EXPORT" "Slinging data CSVs" + + SLING_ALLOW_EMPTY=1 + $SLING_EXE run -r dissemination/sql/sling/bulk_data_export/public_data_v1_0_0_single_csv.yaml + gonogo "sling_bulk_export" + + startup_log "SLING_BULK_EXPORT" "Done" + return 0 +} diff --git a/backend/tools/sling_first_run.SKIP b/backend/tools/sling_first_run.SKIP new file mode 100644 index 0000000000..8e997a5df4 --- /dev/null +++ b/backend/tools/sling_first_run.SKIP @@ -0,0 +1,49 @@ +source tools/util_startup.sh + +function sling_first_run() { + startup_log "SLING_FIRST_RUN" "Slinging data to fac-snapshot if needed" + # check_table_exists might return 1. + # It is possible set -e is kicking us out when the function + # returns an error? So, try set +e. + set +e + # The deploy action does a DB->DB backup with cgov-util. + # This should only ever actually run when testing locally. + # In the cloud environment, it will skip the actul run. + check_table_exists $FAC_SNAPSHOT_URI 'public' 'dissemination_general' + local is_general_table=$FUNCTION_RESULT + if [ $is_general_table -ne 0 ]; then + # This is the first run. + startup_log "SLING_FIRST_RUN" "Running cgov-util INITIAL." + $CGOV_UTIL_EXE db_to_db \ + --src_db fac-db \ + --dest_db fac-snapshot-db \ + --operation initial + fi + + # If the metadata table exists, it means sling has run to completion. + check_table_exists $FAC_SNAPSHOT_URI 'public_data_v1_0_0' 'metadata' + local is_metadata_table=$FUNCTION_RESULT + echo "SLING_FIRST_RUN public_data_v1_0_0.metadata = $is_metadata_table" + + # We need to load some functions for sling to complete, because + # we use those functions as part of the metadata table generation. + local base_path='dissemination/sql' + local location='sling' + + # Only run sling if the tables in the secondary DB do not exist. + if [ $is_metadata_table -ne 0 ]; then + SLING_ALLOW_EMPTY=1 + startup_log "SLING_FIRST_RUN" "API tables don't exist; running sling." + $SLING_EXE run -r dissemination/sql/sling/public_data_v1_0_0/public_data_v1_0_0.yaml + gonogo "sling public data for API tables" + $SLING_EXE run -r dissemination/sql/sling/public_data_v1_0_0/suppressed_data_v1_0_0.yaml + gonogo "sling tribal data for API tables" + $SLING_EXE run -r dissemination/sql/sling/public_data_v1_0_0/public_metadata_v1_0_0.yaml + gonogo "sling metadata table" + startup_log "SLING_FIRST_RUN" "Successfully ran sling to generate tables." + else + startup_log "SLING_FIRST_RUN" "API tables exist; skipping sling." + fi + startup_log "SLING_FIRST_RUN" "Done" + return 0 +} diff --git a/backend/tools/sql_pre_post.sh b/backend/tools/sql_pre_post.sh new file mode 100644 index 0000000000..c4503468a8 --- /dev/null +++ b/backend/tools/sql_pre_post.sh @@ -0,0 +1,47 @@ +source tools/util_startup.sh + +function run_sql_files { + local db="$1" + local tag="$2" + local pre_post="$3" + local which_db="NO_DB_SELECTED" + + if [ $db == $FAC_DB_URI ]; then + which_db="fac-db" + fi; + if [ $db == $FAC_SNAPSHOT_URI ]; then + which_db="fac-snapshot-db" + fi; + + startup_log $tag "BEGIN" + # Loop by index. + for file in `ls dissemination/sql/$which_db/$pre_post/*.sql`; + do + # run_sql has an explicit go/no-go built-in. + run_sql $db $file + done + startup_log $tag "END" + return 0 +} + +function sql_pre_fac_db { + run_sql_files $FAC_DB_URI "SQL_PRE" "pre" +} + +function sql_pre_fac_snapshot_db { + run_sql_files $FAC_SNAPSHOT_URI "SQL_PRE" "pre" +} + +function sql_post_fac_db { + run_sql_files $FAC_DB_URI "SQL_POST" "post" +} + +function sql_post_fac_snapshot_db { + run_sql_files $FAC_SNAPSHOT_URI "SQL_POST" "post" +} + +function vacuum_snapshot_db { + # Vacuum things when we're done. + # Cannot run inside a transaction. + $PSQL_EXE_NO_TXN $FAC_SNAPSHOT_URI -c "VACUUM (FULL, VERBOSE, ANALYZE);" +} diff --git a/backend/tools/util_startup.sh b/backend/tools/util_startup.sh index ca54205ab9..a4c37ac346 100644 --- a/backend/tools/util_startup.sh +++ b/backend/tools/util_startup.sh @@ -1,7 +1,9 @@ +source tools/variables.sh + function startup_log { local tag="$1" local msg="$2" - echo STARTUP $tag $msg + echo "STARTUP" "$tag" "$msg" } # gonogo @@ -9,8 +11,95 @@ function startup_log { function gonogo { if [ $? -eq 0 ]; then startup_log "STARTUP_CHECK" "$1 PASS" + return 0 else startup_log "STARTUP_CHECK" "$1 FAIL" - exit -1 + exit 1 + fi +} + +# 2024-10-10T12:28:29.61-0400 [APP/PROC/WEB/0] OUT CHECK_TABLE_EXISTS START: public.dissemination_general +# 2024-10-10T12:28:29.65-0400 [APP/PROC/WEB/0] OUT CHECK_TABLE_EXISTS END: public.dissemination_general = t +# 2024-10-10T12:28:29.65-0400 [APP/PROC/WEB/0] OUT CHECK_TABLE_EXISTS: public_data_v1_0_0.metadata +# 2024-10-10T12:28:29.68-0400 [APP/PROC/WEB/0] OUT CHECK_TABLE_EXISTS public_data_v1_0_0.metadata: f +# 2024-10-10T12:28:29.68-0400 [APP/PROC/WEB/0] OUT Exit status 1 +# 2024-10-10T12:28:29.68-0400 [CELL/SSHD/0] OUT Exit status 0 + +function check_table_exists() { + local db_uri="$1" + local schema="$2" + local table="$3" + + echo "CHECK_TABLE_EXISTS START: $schema.$table" + # >/dev/null 2>&1 + # The qtAX incantation lets us pass the PSQL result value back to bash. + result=`$PSQL_EXE "$db_uri" -qtAX -c "SELECT EXISTS (SELECT FROM pg_tables WHERE schemaname = '$schema' AND tablename = '$table');"` + # Flip TRUE to a 0, because UNIX considers a 0 exit code to be good. + if [ "$result" = "t" ]; then + echo "CHECK_TABLE_EXISTS END: $schema.$table = 0" + FUNCTION_RESULT=0 + else + echo "CHECK_TABLE_EXISTS END: $schema.$table = 1" + FUNCTION_RESULT=1 + fi + return 0 +} + +function check_schema_exists () { + local db_uri="$1" + local schema_name="$2" + echo "CHECK_SCHEMA_EXISTS START: $schema_name" + result=`$PSQL_EXE $db_uri -qtAX -c "SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = '$schema_name');"` + # Flip TRUE to a 0, because UNIX considers a 0 exit code to be good. + if [ "$result" = "t" ]; then + echo "CHECK_SCHEMA_EXISTS END: $schema_name = 0" + FUNCTION_RESULT=0 + else + echo "CHECK_SCHEMA_EXISTS END: $schema_name = 1" + FUNCTION_RESULT=1 fi + return 0 +} + +function run_sql () { + local db_uri="$1" + local path="$2" + echo "BEGIN run_sql < $path" + if [[ "$file" == *"notxn"* ]]; then + $PSQL_EXE_NO_TXN $db_uri < $path; + gonogo "GONOGO run_sql < $path" + else + $PSQL_EXE $db_uri < $path; + gonogo "GONOGO run_sql < $path" + fi +} + +export _GET_AWS_RESULT="NONE" + +function get_aws_s3() { + local bucket="$1" + local key="$2" + _GET_AWS_RESULT=$(echo $VCAP_SERVICES | \ + jq --arg bucket "$bucket" '.s3 | map(select(.instance_name==$bucket))' | \ + jq .[] | \ + jq --raw-output --arg key "$key" '.credentials | .[$key]') + return 0 +} + +function check_env_var_not_empty() { + local var_name="$1" + local var_value="${!var_name}" + if [ -z "${var_value}" ]; + then + echo "CHECK_ENV_VAR ${var_name} has value '${var_value}'. Exiting."; + exit 255; + else + echo "CHECK_ENV_VAR ${var_name} is OK." + fi +} + +function show_env_var() { + local var_name="$1" + local var_value="${!var_name}" + echo "SHOW_ENV_VAR ${var_name} has value '${var_value}'."; } diff --git a/backend/tools/variables.sh b/backend/tools/variables.sh new file mode 100644 index 0000000000..f28b2d998c --- /dev/null +++ b/backend/tools/variables.sh @@ -0,0 +1 @@ +export FUNCTION_RESULT="DAS DEFAULT" diff --git a/backend/users/admin.py b/backend/users/admin.py index 64261d48eb..e126e43597 100644 --- a/backend/users/admin.py +++ b/backend/users/admin.py @@ -17,20 +17,34 @@ class PermissionAdmin(admin.ModelAdmin): @admin.register(User) class UserAdmin(admin.ModelAdmin): - list_display = ["email", "can_read_tribal", "last_login", "date_joined"] - list_filter = ["is_staff", "is_superuser"] - exclude = ["groups", "user_permissions", "password"] + list_display = [ + "email", + "can_read_tribal", + "last_login", + "date_joined", + "assigned_groups", + ] + list_filter = ["is_staff", "is_superuser", "groups"] + exclude = ["user_permissions", "password"] readonly_fields = ["date_joined", "last_login"] search_fields = ("email", "username") def can_read_tribal(self, obj): return _can_read_tribal(obj) + def assigned_groups(self, obj): + return ", ".join([g.name for g in obj.groups.all()]) + @admin.register(UserPermission) class UserPermissionAdmin(admin.ModelAdmin): list_display = ["user", "email", "permission"] search_fields = ("email", "permission", "user") + fields = ["email", "permission"] + + def save_model(self, request, obj, form, change): + obj.user = User.objects.get(email=obj.email) + super().save_model(request, obj, form, change) @admin.register(StaffUserLog) @@ -57,8 +71,7 @@ def has_delete_permission(self, request, obj=None): class StaffUserAdmin(admin.ModelAdmin): list_display = [ "staff_email", - "added_by_email", - "date_added", + "privilege", ] fields = [ "staff_email", @@ -91,3 +104,9 @@ def has_add_permission(self, request, obj=None): def has_delete_permission(self, request, obj=None): return request.user.is_superuser + + def privilege(self, obj): + user = User.objects.get(email=obj.staff_email, is_staff=True) + if user.is_superuser: + return "Superuser" + return ", ".join([g.name for g in user.groups.all()]) diff --git a/backend/users/management/commands/create_staffusers.py b/backend/users/management/commands/create_staffusers.py new file mode 100644 index 0000000000..0a77188d5a --- /dev/null +++ b/backend/users/management/commands/create_staffusers.py @@ -0,0 +1,115 @@ +from django.conf import settings +from django.contrib.auth import get_user_model +from django.contrib.auth.models import Group, Permission +from django.core.management.base import BaseCommand +from django.db import transaction +from users.models import StaffUser +import json +import logging +import os + +logger = logging.getLogger(__name__) +User = get_user_model() + + +class Command(BaseCommand): + + def handle(self, *args, **kwargs): + """Create a group with readonly permissions.""" + group_readonly, created = Group.objects.get_or_create(name="Read-only") + readonly_codenames = [ + "view_access", + "view_deletedaccess", + "view_singleauditchecklist", + "view_sacvalidationwaiver", + "view_ueivalidationwaiver", + "view_additionalein", + "view_additionaluei", + "view_captext", + "view_federalaward", + "view_findingtext", + "view_finding", + "view_general", + "view_note", + "view_passthrough", + "view_secondaryauditor", + "view_cognizantassignment", + "view_cognizantbaseline", + "view_staffuser", + "view_userpermission", + "view_tribalapiaccesskeyids", + ] + group_readonly.permissions.clear() + for code in readonly_codenames: + group_readonly.permissions.add(Permission.objects.get(codename=code)) + group_readonly.save() + + """Create a group with helpdesk permissions.""" + group_helpdesk, created = Group.objects.get_or_create(name="Helpdesk") + helpdesk_codenames = readonly_codenames + [ + "add_userpermission", + "change_userpermission", + "delete_userpermission", + "add_tribalapiaccesskeyids", + "change_tribalapiaccesskeyids", + "delete_tribalapiaccesskeyids", + "add_sacvalidationwaiver", + "add_ueivalidationwaiver", + "add_cognizantassignment", + ] + group_helpdesk.permissions.clear() + for code in helpdesk_codenames: + group_helpdesk.permissions.add(Permission.objects.get(codename=code)) + group_helpdesk.save() + + # read in staffusers JSON. + user_list = None + with open( + os.path.join(settings.BASE_DIR, "config/staffusers.json"), "r" + ) as file: + user_list = json.load(file) + + if user_list: + + # clear superuser privileges. + superusers = User.objects.filter(is_superuser=True) + for superuser in superusers: + superuser.is_superuser = False + superuser.save() + + # clear existing staff users. + StaffUser.objects.all().delete() + + for role in user_list: + for email in user_list[role]: + + # create staff user for each role. + with transaction.atomic(): + + StaffUser( + staff_email=email, + ).save() + + # attempt to update the user. + try: + user = User.objects.get(email=email, is_staff=True) + + user.groups.clear() + match role: + case "readonly": + user.groups.add(group_readonly) + case "helpdesk": + user.groups.clear() + user.groups.add(group_helpdesk) + case "superuser": + user.is_superuser = True + + user.save() + logger.info(f"Synced {email} to a StaffUser role.") + + # for whatever reason, this failed. Revert staffuser creation. + except User.DoesNotExist: + transaction.set_rollback(True) + logger.warning( + f"StaffUser not created for {email}, they have not logged in yet." + ) diff --git a/backend/users/models.py b/backend/users/models.py index de502a92db..e0c0534b1f 100644 --- a/backend/users/models.py +++ b/backend/users/models.py @@ -70,6 +70,7 @@ def delete(self, *args, **kwargs): try: user = User.objects.get(email=self.staff_email) user.is_staff = False + user.is_superuser = False user.save() except User.DoesNotExist: pass # silently ignore. Nothing to do. diff --git a/backend/tools/__init__.py b/backend/util/__init__.py similarity index 100% rename from backend/tools/__init__.py rename to backend/util/__init__.py diff --git a/backend/util/api_perf_test/perf.py b/backend/util/api_perf_test/perf.py new file mode 100644 index 0000000000..9264fd6cd2 --- /dev/null +++ b/backend/util/api_perf_test/perf.py @@ -0,0 +1,259 @@ +import requests +import os +import time +from pprint import pprint +import math + +URI = "http://localhost:3000" + +# GET {{scheme}}://{{apiUrl}}/general?report_id=eq.2021-12-CENSUS-0000250449 +# authorization: {{authorization}} +# x-api-user-id: {{xApiUserId}} +# accept-profile: api_v2_0_0 +# Accept: application/vnd.pgrst.plan + + +def fetch_fa_exp(api_version): + total_cost = 0 + for offset in range(0, 4000000, 20000): + print(f"fetch_fa_exp api {api_version} offset {offset}") + query = f"{URI}/federal_awards?limit=20000&offset={offset}" + headers = { + "accept-profile": api_version, + "accept": "application/vnd.pgrst.plan+json", + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + + resp = requests.get(query, headers=headers, timeout=60) + # We get back a list of one plan, and we want the total cost. + total_cost += resp.json()[0]["Plan"]["Total Cost"] + return math.floor(total_cost) + + +def fetch_fa_by_year_exp(api_version): + total_cost = 0 + for year in range(16, 24): + audit_year = f"20{year:02}" + for offset in range(0, 1000000, 20000): + print( + f"fetch_fa_by_year_exp api {api_version} ay {audit_year} offset {offset}" + ) + query = f"{URI}/federal_awards?audit_year=eq.{audit_year}&limit=20000&offset={offset}" + headers = { + "accept-profile": api_version, + "accept": "application/vnd.pgrst.plan+json", + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + + resp = requests.get(query, headers=headers, timeout=60) + # We get back a list of one plan, and we want the total cost. + total_cost += resp.json()[0]["Plan"]["Total Cost"] + return math.floor(total_cost) + + +def fetch_fa_time(api_version): + total_cost = 0 + for offset in range(0, 4000000, 20000): + query = f"{URI}/federal_awards?limit=20000&offset={offset}" + headers = { + "accept-profile": api_version, + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + t0 = time.time() + requests.get(query, headers=headers, timeout=60) + t1 = time.time() + # We get back a list of one plan, and we want the total cost. + total_cost += t1 - t0 + print(f"fetch_fa_time api {api_version} offset {offset} time {t1 - t0}") + return math.floor(total_cost) + + +def fetch_fa_time_by_year(api_version): + total_cost = 0 + for year in range(16, 24): + for offset in range(0, 1000000, 20000): + audit_year = f"20{year:02}" + query = f"{URI}/federal_awards?audit_year=eq.{audit_year}&limit=20000&offset={offset}" + headers = { + "accept-profile": api_version, + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + t0 = time.time() + requests.get(query, headers=headers, timeout=60) + t1 = time.time() + # We get back a list of one plan, and we want the total cost. + total_cost += t1 - t0 + print( + f"fetch_fa_time_by_year api {api_version} ay {audit_year} offset {offset} time {t1 - t0}" + ) + return math.floor(total_cost) + + +def fetch_fa_batches_exp(): + total_cost = 0 + for batch_no in range(0, 235): + query = f"{URI}/federal_awards?batch_number=eq.{batch_no}" + headers = { + "accept-profile": "api_v2_0_0", + "accept": "application/vnd.pgrst.plan+json", + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + + resp = requests.get(query, headers=headers, timeout=60) + # We get back a list of one plan, and we want the total cost. + total_cost += resp.json()[0]["Plan"]["Total Cost"] + return math.floor(total_cost) + + +def fetch_fa_batches_time(): + total_cost = 0 + for batch_no in range(0, 235): + print(f"batch number: {batch_no}") + query = f"{URI}/federal_awards?batch_number=eq.{batch_no}" + headers = { + "accept-profile": "api_v2_0_0", + "x-api-user-id": os.getenv("API_KEY_ID"), + "authorization": f"bearer {os.getenv('CYPRESS_API_GOV_JWT')}", + } + t0 = time.time() + requests.get(query, headers=headers, timeout=60) + t1 = time.time() + # We get back a list of one plan, and we want the total cost. + total_cost += t1 - t0 + return math.floor(total_cost) + + +def make_ratios(d1, d2): + min = math.inf + for k, v in d1.items(): + if v < min: + min = v + for k, v in d1.items(): + d2[k] = round(v / min, 2) + + +if __name__ == "__main__": + results1: dict = {} + results2: dict = {} + results3: dict = {} + results4: dict = {} + + results1["api110_by_year"] = fetch_fa_by_year_exp("api_v1_1_0") + results1["ap110"] = fetch_fa_exp("api_v1_1_0") + results1["public100"] = fetch_fa_exp("api_v2_0_0") + results1["public100_batches"] = fetch_fa_batches_exp() + results1["public100_by_year"] = fetch_fa_by_year_exp("api_v2_0_0") + + print("Running timing tests... ~5m") + + results3["public100_by_year"] = fetch_fa_time_by_year("api_v2_0_0") + results3["public100"] = fetch_fa_time("api_v2_0_0") + results3["public100_batches"] = fetch_fa_batches_time() + results3["ap110"] = fetch_fa_time("api_v1_1_0") + results3["ap110_by_year"] = fetch_fa_time_by_year("api_v1_1_0") + + make_ratios(results1, results2) + make_ratios(results3, results4) + results1["desc"] = "EXPLAIN raw" + results2["desc"] = "EXPLAIN ratio" + results3["desc"] = "TIME raw" + results4["desc"] = "TIME ratio" + # results1 is the raw EXPLAIN cost of downloading all of federal_awards + pprint(results1) + # results2 is the ratio + pprint(results2) + # results3 is the raw timing + pprint(results3) + # results4 is the ratio for timings + pprint(results4) + +# Where there is no index on the audit_year column. +# by_year is worse. +# {'ap110': 36737858, +# 'api110_by_year': 87880072, +# 'public100': 36305424, +# 'public100_batches': 1176467, +# 'public100_by_year': 11407901} +# {'ap110': 31, +# 'api110_by_year': 74, +# 'public100': 30, +# 'public100_batches': 1, +# 'public100_by_year': 9} +# {'ap110': 188, +# 'ap110_by_year': 304, +# 'public100': 62, +# 'public100_batches': 29, +# 'public100_by_year': 40} +# {'ap110': 6, +# 'ap110_by_year': 10, +# 'public100': 2, +# 'public100_batches': 1, +# 'public100_by_year': 1} + + +# {'ap110': 36737858, +# 'api110_by_year': 87880072, +# 'desc': 'EXPLAIN raw', +# 'public100': 36304899, +# 'public100_batches': 1179458, +# 'public100_by_year': 11407650} +# {'ap110': 31.15, +# 'api110_by_year': 74.51, +# 'desc': 'EXPLAIN ratio', +# 'public100': 30.78, +# 'public100_batches': 1.0, +# 'public100_by_year': 9.67} +# {'ap110': 193, +# 'ap110_by_year': 312, +# 'desc': 'TIME raw', +# 'public100': 63, +# 'public100_batches': 29, +# 'public100_by_year': 45} +# {'ap110': 6.66, +# 'ap110_by_year': 10.76, +# 'desc': 'TIME ratio', +# 'public100': 2.17, +# 'public100_batches': 1.0, +# 'public100_by_year': 1.55} + +# For local testing. +# sqlite-rest serve --auth-token-file test.token --security-allow-table general,federal_awards --db-dsn public-data.sqlite +# export AUTH_TOKEN="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.zJrV44Lhr1Ck4vg1dMnldql0adLgut241jo0FbFXMlI" +# echo -n "topsecret" > test.token + +# SQLite, no indexes +# {'desc': 'EXPLAIN raw'} +# {'desc': 'EXPLAIN ratio'} +# {'ap110': 70, +# 'ap110_by_year': 133, +# 'desc': 'TIME raw', +# 'public100': 69, +# 'public100_batches': 123, +# 'public100_by_year': 130} +# {'ap110': 1.01, +# 'ap110_by_year': 1.93, +# 'desc': 'TIME ratio', +# 'public100': 1.0, +# 'public100_batches': 1.78, +# 'public100_by_year': 1.88} + +# After applying same indexes as PG has. +# {'desc': 'EXPLAIN raw'} +# {'desc': 'EXPLAIN ratio'} +# {'ap110': 70, +# 'ap110_by_year': 72, +# 'desc': 'TIME raw', +# 'public100': 71, +# 'public100_batches': 69, +# 'public100_by_year': 71} +# {'ap110': 1.01, +# 'ap110_by_year': 1.04, +# 'desc': 'TIME ratio', +# 'public100': 1.03, +# 'public100_batches': 1.0, +# 'public100_by_year': 1.03} diff --git a/backend/tools/collect_scan_metrics.py b/backend/util/collect_scan_metrics.py similarity index 100% rename from backend/tools/collect_scan_metrics.py rename to backend/util/collect_scan_metrics.py diff --git a/backend/tools/generate_xlsx_files.py b/backend/util/generate_xlsx_files.py similarity index 100% rename from backend/tools/generate_xlsx_files.py rename to backend/util/generate_xlsx_files.py diff --git a/backend/util/load_public_dissem_data/.gitignore b/backend/util/load_public_dissem_data/.gitignore new file mode 100644 index 0000000000..6c05ad4054 --- /dev/null +++ b/backend/util/load_public_dissem_data/.gitignore @@ -0,0 +1,7 @@ +*.csv +*.zip +*.tar +*.gz +db_dissem_dump* +__MACOSX +data/sling.yaml diff --git a/backend/util/load_public_dissem_data/Dockerfile b/backend/util/load_public_dissem_data/Dockerfile new file mode 100644 index 0000000000..c5d84055de --- /dev/null +++ b/backend/util/load_public_dissem_data/Dockerfile @@ -0,0 +1,32 @@ +FROM debian:bookworm + +RUN apt-get update && \ + apt-get -y upgrade +RUN apt-get install -y \ + postgresql-client \ + curl \ + unzip + +WORKDIR /layered +COPY data/config.json . + +# Curl in `sling` +RUN curl -LO 'https://github.com/slingdata-io/sling-cli/releases/latest/download/sling_linux_amd64.tar.gz' \ + && tar xf sling_linux_amd64.tar.gz \ + && rm -f sling_linux_amd64.tar.gz \ + && chmod +x sling \ + && mv sling /bin/sling + +RUN curl -L -O https://github.com/GSA-TTS/fac-backup-utility/releases/download/v0.1.9/gov.gsa.fac.cgov-util-v0.1.9-linux-amd64.tar.gz \ + && tar xvzf gov.gsa.fac.cgov-util-v0.1.9-linux-amd64.tar.gz gov.gsa.fac.cgov-util \ + && chmod 755 gov.gsa.fac.cgov-util \ + && mv gov.gsa.fac.cgov-util /layered/cgov-util + +# Where do we need the config? Try home? +WORKDIR /root/.fac +COPY data/config.json . +ARG FAC_DB_URI="postgresql://postgres@db:5432/postgres?sslmode=disable" +ARG RUNNING_LOCAL=0 + +WORKDIR /app +ENTRYPOINT [ "./load_public_data_locally.sh" ] diff --git a/backend/util/load_public_dissem_data/Makefile b/backend/util/load_public_dissem_data/Makefile new file mode 100644 index 0000000000..77733d2fc7 --- /dev/null +++ b/backend/util/load_public_dissem_data/Makefile @@ -0,0 +1,10 @@ +NETWORK?=backend-db2-1 + +build: + docker build -t facloaddata --build-arg FAC_DB_URI=${FAC_DB_URI} . + +run: + docker run -i --rm -v ${PWD}:/app --network container:${NETWORK} -t facloaddata + +run_remote: + docker run -i --rm -v ${PWD}:/app facloaddata "${FAC_DB_URI}" diff --git a/backend/util/load_public_dissem_data/README.md b/backend/util/load_public_dissem_data/README.md new file mode 100644 index 0000000000..62a62f117b --- /dev/null +++ b/backend/util/load_public_dissem_data/README.md @@ -0,0 +1,77 @@ +# loading public data + +This provides a containerized data loading process that sets up your local FAC in a manner that duplicates the live/production app. + +The data we are using is public, historic data. It can be replaced, at a later point, with data that is more current. + +## Full clean + +You might want a completely clean local stack to start. It is not strictly necessary. If you get key conflicts, it means you already have some of this historic data loaded. + +### Wipe the stack + +From the backend folder + +``` +make -i docker-full-clean +``` + +Note the `-i` flag. This means `make` should ignore errors. You want it to, so it will keep going and wipe everything. + +``` +make docker-first-run +``` + +and then + +``` +docker compose up +``` + +We need the stack running for this whole process. + +## Grab the ZIP + +You'll need to grab + +https://drive.google.com/drive/folders/1gUsqD31Pkd17CruE4PWwwPKJVUssYNnI + +which is cleaned/historic public data, fit for our dissem_* tables. + +Compressed, it is 330MB. Uncompressed, around 3GB. + +Put it in dissemination/tools/load_public_dissem_data/data (a child of this directory) + +## Build the container + +This is containerized, so it should work on all platforms. To build the container, run + +``` +make build +``` + +Then, to run the container, + +``` +make run +``` + +You need to run this from the current directory. + +NOTE: The docker command in the Makefile uses the `--network` flag. The `--network` flag tells Docker to run this container on the same network as your currently running stack. It assumes that you did a `docker compose up` on the FAC stack, and that the web container has the default name of `backend-web-1`. If this does not work, you will need to... + +``` +make NETWORK= run +``` + +where `` is the name of your web container. This should allow this container to correctly talk to our databases. + +## When to rebuild this container + +Note this is pinned to v0.1.9 of the cgov-util. + +https://github.com/GSA-TTS/fac-backup-utility + +If that gets updated, you'll need to update the dockerfile. + +It also copies in the YAML for sling from `dissemination/sql/sling`. If that changes, you'll want to diff --git a/backend/util/load_public_dissem_data/data/README.md b/backend/util/load_public_dissem_data/data/README.md new file mode 100644 index 0000000000..6019b70931 --- /dev/null +++ b/backend/util/load_public_dissem_data/data/README.md @@ -0,0 +1,3 @@ +The historic/public data dump ends up in this directory. + +The container will mount this directory in order to unzip and load the data into the local environment. diff --git a/backend/util/load_public_dissem_data/data/config.json b/backend/util/load_public_dissem_data/data/config.json new file mode 100644 index 0000000000..af567bf84d --- /dev/null +++ b/backend/util/load_public_dissem_data/data/config.json @@ -0,0 +1,118 @@ +{ + "s3": [ + { + "label": "s3", + "provider": "minio-local", + "plan": "basic", + "name": "fac-private-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDALPHA1", + "instance_name": "fac-private-s3", + "binding_guid": "UUIDALPHA2", + "binding_name": null, + "credentials": { + "uri": "http://minio:9000", + "port": 9000, + "insecure_skip_verify": false, + "access_key_id": "singleauditclearinghouse", + "secret_access_key": "singleauditclearinghouse", + "region": "us-east-1", + "bucket": "fac-private-s3", + "endpoint": "minio", + "fips_endpoint": "minio", + "additional_buckets": [] + }, + "syslog_drain_url": "https://ALPHA.drain.url", + "volume_mounts": [ + "no_mounts" + ] + }, + { + "label": "s3", + "provider": "minio-local", + "plan": "basic", + "name": "fac-public-s3", + "tags": [ + "AWS", + "S3", + "object-storage" + ], + "instance_guid": "UUIDALPHA1", + "instance_name": "fac-public-s3", + "binding_guid": "UUIDALPHA2", + "binding_name": null, + "credentials": { + "uri": "http://minio:9000", + "port": 9000, + "insecure_skip_verify": false, + "access_key_id": "singleauditclearinghouse", + "secret_access_key": "singleauditclearinghouse", + "region": "us-east-1", + "bucket": "fac-public-s3", + "endpoint": "minio", + "fips_endpoint": "minio", + "additional_buckets": [] + }, + "syslog_drain_url": "https://ALPHA.drain.url", + "volume_mounts": [ + "no_mounts" + ] + } + ], + "aws-rds": [ + { + "label": "fac-db", + "provider": null, + "plan": null, + "name": "fac-db", + "tags": [ + "database", + "docker" + ], + "instance_guid": "UUIDINDIA1", + "instance_name": "db", + "binding_guid": "UUIDINDIA2", + "binding_name": null, + "credentials": { + "db_name": "postgres", + "host": "db", + "name": "postgres", + "password": "", + "port": "5432", + "uri": "postgres://postgres@db:5432/postgres?sslmode=disable", + "username": "postgres" + }, + "syslog_drain_url": null, + "volume_mounts": [] + }, + { + "label": "fac-snapshot-db", + "provider": null, + "plan": null, + "name": "fac-snapshot-db", + "tags": [ + "database", + "docker" + ], + "instance_guid": "UUIDJULIET1", + "instance_name": "db", + "binding_guid": "UUIDJULIET2", + "binding_name": null, + "credentials": { + "db_name": "postgres", + "host": "db2", + "name": "postgres", + "password": "", + "port": "5432", + "uri": "postgres://postgres@db2:5432/postgres?sslmode=disable", + "username": "postgres" + }, + "syslog_drain_url": null, + "volume_mounts": [] + } + ] +} diff --git a/backend/util/load_public_dissem_data/load_public_data_locally.sh b/backend/util/load_public_dissem_data/load_public_data_locally.sh new file mode 100755 index 0000000000..562e1496f7 --- /dev/null +++ b/backend/util/load_public_dissem_data/load_public_data_locally.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +export CGOV_UTIL_VERSION=v0.1.9 + +echo "FAC_DB_URI is [" $1 "]" + +if [ -z "$1" ] +then + export FAC_DB_URI="postgresql://postgres@db:5432/postgres?sslmode=disable" +else + export FAC_DB_URI="$1" + export RUNNING_LOCAL=1 +fi + +echo "FAC_DB_URI is now [" $FAC_DB_URI "] local [" $RUNNING_LOCAL "]" + +export FAC_SNAPSHOT_URI="postgresql://postgres@db2:5432/postgres?sslmode=disable" + + +function check_table_exists() { + local db_uri="$1" + local dbname="$2" + psql $db_uri -c "SELECT '$dbname'::regclass" >/dev/null 2>&1 + result=$? + return $result +} + +echo "This will unzip ~3.3GB of data, and load it into a local FAC." +echo "Make sure the FAC is running." +echo "Make sure you have disk space." +echo "Sleeping for 4 seconds..." +sleep 4 + +# First, we cleanup the local filesystem. +# This removes any temporary files from any +# previous data loads +if [ "$RUNNING_LOCAL" -eq "1" ]; then + export BASE_PATH="." +else + export BASE_PATH="/app" +fi + +rm -f $BASE_PATH/data/db_dissem_dump +rm -rf $BASE_PATH/data/__MACOSX + +# # Next, we drop the public_data schema. +# # This is because we want to make sure it is +# # regenerated fresh. +# psql $FAC_SNAPSHOT_URI -c "DROP SCHEMA IF EXISTS public_data_v1_0_0 CASCADE" + +# # Now, the schema for the public_data is +# # created. This provies a place for the tables to +# # land when we run sling +# psql $FAC_SNAPSHOT_URI -c "CREATE SCHEMA IF NOT EXISTS public_data_v1_0_0" +# psql $FAC_SNAPSHOT_URI -c "CREATE SEQUENCE IF NOT EXISTS public_data_v1_0_0.seq_combined START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE" + +# Unzip the compressed historical data dump. + +pushd $BASE_PATH/data +echo "Unzipping data." +unzip db_dissem_dump.zip +popd + +# Truncate the dissemination_* tables if they exist. +# CASCADE as well. This makes sure we don't duplicate data, +# which causes PK/FK problems. +echo "select 'TRUNCATE TABLE '||tablename||' CASCADE;' FROM pg_tables WHERE tablename LIKE 'dissemination_%'" | \ + psql $FAC_DB_URI -t | \ + psql $FAC_DB_URI + +# Load the large historic dataset. +psql $FAC_DB_URI -v ON_ERROR_STOP=on < $BASE_PATH/data/db_dissem_dump +result=$? +if [ $result -ne 0 ]; then + echo "Something went wrong." + exit -1 +else + echo "Loaded lots of data without error, apparently." +fi diff --git a/backend/util/nightly_api_refresh.sh b/backend/util/nightly_api_refresh.sh new file mode 100755 index 0000000000..d63ed05557 --- /dev/null +++ b/backend/util/nightly_api_refresh.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +source tools/util_startup.sh +source tools/setup_env.sh +source tools/sql_pre_post.sh +setup_env + +# Run an RDS backup, to refresh dissemination_* in FAC_SNAPSHOT from FAC_DB +#./../fac-backup-util.sh "v0.1.9" "rds_backup" +./fac-backup-util.sh "$1" "$2" +gonogo "fac-backup-util" + +# Run the pre/post. +sql_pre_fac_snapshot_db +gonogo "sql_pre_fac_snapshot_db" + +sql_post_fac_snapshot_db +gonogo "sql_post_fac_snapshot_db" + +# We might, at some point, +# consider running a vacuum on DB1 +# as part of a nightly or weekly job. +# Below is *representative* code. +# run_sql $FAC_DB_URI -c "VACUUM(FULL, ANALYZE)" diff --git a/backend/tools/test_update_program_data.py b/backend/util/test_update_program_data.py similarity index 100% rename from backend/tools/test_update_program_data.py rename to backend/util/test_update_program_data.py diff --git a/backend/tools/update_program_data.py b/backend/util/update_program_data.py similarity index 100% rename from backend/tools/update_program_data.py rename to backend/util/update_program_data.py diff --git a/docs/backups_and_restores.md b/docs/backups_and_restores.md index 9019cae010..720c82a343 100644 --- a/docs/backups_and_restores.md +++ b/docs/backups_and_restores.md @@ -34,7 +34,7 @@ Information regarding the fac-backup-utility can be found [at the repository](ht Database backups occur in the following ways: 1. An initial backup, where a backup has not been run in the target environment. This input of `initial_backup` is important, as when it does a the `db_to_db` command, it will not truncate the target table, as the table does not exist in the destination database. ```bash -./fac-backup-util.sh v0.1.5 initial_backup +./fac-backup-util.sh v0.1.9 initial_backup # Curl the utility # Install AWS # DB to S3 table dump (backups) @@ -44,7 +44,7 @@ Database backups occur in the following ways: 2. A deploy backup, where the `db_to_db` function is not called. This is a standard backup strategy before the application deploys, to ensure the s3 contents of the primary s3 are sync'd to the backups bucket, and a table dump is stored in the backups bucket. ```bash -./fac-backup-util.sh v0.1.5 deploy_backup +./fac-backup-util.sh v0.1.9 deploy_backup # Curl the utility # Install AWS # DB to S3 table dump (backups) @@ -53,7 +53,7 @@ Database backups occur in the following ways: 3. A scheduled backup is run every two hours, across each environment, ensuring that we have a clean backup in s3, rds, and the bucket contents are in sync. ```bash -./fac-backup-util.sh v0.1.5 scheduled_backup +./fac-backup-util.sh v0.1.9 scheduled_backup # Curl the utility # Install AWS # DB to S3 table dump (fac-db -> backups) @@ -66,7 +66,7 @@ Restoring from backups can be run via workflow, from designated individuals. The 1. S3 Restore takes a `operation-mm-DD-HH` input (ex `scheduled-06-04-10`), and is required for the backups to be restored. The utility looks in `s3://${bucket}/backups/operation-mm-DD-HH/` for its table dumps, and without supplying the target backups, it will not restore. Once it does a `--data-only` restoration, it will then sync the files from the backups bucket to the application bucket. We do this to ensure the contents of the application bucket are up to date, relative to the data in the database. We know that if we use the latest folder in `/backups/` then the contents of the s3 are the latest available, from the prior backup. ```bash -./fac-restore-util.sh v0.1.5 s3_restore scheduled-06-04-10 +./fac-restore-util.sh v0.1.9 s3_restore scheduled-06-04-10 # Curl the utility # Install AWS # DB to S3 table dump (backups -> fac-db) [Truncate target table before --data-only pg_restore] @@ -81,7 +81,7 @@ daily-mm-dd 2. Database to database restoration also can occur as well, using `psql` to dump the tables from the cold store database to the live database. ```bash -./fac-restore-util.sh v0.1.5 db_restore +./fac-restore-util.sh v0.1.9 db_restore # Curl the utility # Install AWS # DB to DB table dump (fac-snapshot-db -> fac-db) [Truncate target table before dump] diff --git a/docs/user-management.md b/docs/user-management.md index c7d999da42..89b82654b5 100644 --- a/docs/user-management.md +++ b/docs/user-management.md @@ -6,7 +6,9 @@ The first time you login to the application, a user record in the FAC database w ## Admin users -To promote a user to either staff or superuser status, we'll use a Django management command. If you're running locally, the management command can be executed directly. +### Local development + +To acquire admin access in the local environment, you can execute a management command directly. ```bash # Run make_staff to promote a user to staff status @@ -16,23 +18,13 @@ python manage.py make_staff user@example.com python manage.py make_super user@example.com ``` -To run it against a deployed instance, we'll SSH into the app, [configure our session](https://cloud.gov/docs/management/using-ssh/#application-debugging-tips), and then run the command. - -```bash -# SSH to cloud.gov **instance** -cf ssh gsa-fac-dev +### Deployed instances -# Configure session per cloud.gov docs -/tmp/lifecycle/shell +To add/remove/promote a user in the admin interface, you would modify the [staffusers.json](../backend/config/staffusers.json) list and submit a PR for this change. -# Run make_staff to promote a user to staff status -python manage.py make_staff user@example.com - -# Run make_super to promote a user to superuser status -python manage.py make_super user@example.com -``` +Once the application starts up, it will adjust the user access to the admin site based on the updated list. If you are included in this list, you will be able to access the site via the `/admin` page. -Once your user is promoted, you'll be able to access the admin site via the `/admin` page. +**NOTE** - The email addresses included in this list MUST have a Login.gov account and have used it to log in to the application at least once. Otherwise, the application will ignore the email address on startup. ## Non-admin users diff --git a/terraform/shared/modules/env/postgrest.tf b/terraform/shared/modules/env/postgrest.tf index 61d4725ed0..78dffd6211 100644 --- a/terraform/shared/modules/env/postgrest.tf +++ b/terraform/shared/modules/env/postgrest.tf @@ -10,7 +10,7 @@ resource "cloudfoundry_route" "postgrest" { resource "cloudfoundry_service_key" "postgrest" { name = "postgrest" - service_instance = module.database.instance_id + service_instance = module.snapshot-database.instance_id } data "docker_registry_image" "postgrest" { @@ -32,7 +32,7 @@ resource "cloudfoundry_app" "postgrest" { environment = { PGRST_DB_URI : cloudfoundry_service_key.postgrest.credentials.uri - PGRST_DB_SCHEMAS : "api_v1_0_3,api_v1_1_0,admin_api_v1_1_0" + PGRST_DB_SCHEMAS : "api_v1_1_0,api_v2_0_0" PGRST_DB_ANON_ROLE : "anon" PGRST_JWT_SECRET : var.pgrst_jwt_secret PGRST_DB_MAX_ROWS : 20000