From 8689bf60365350ad8249a435d85c645d47c36429 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:30:42 -0500 Subject: [PATCH 01/32] Initial work to break out setup task work and define process arguments --- airflow/dags/cwl_dag_modular.py | 98 ++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index bd2eb466..107c54fc 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -21,6 +21,8 @@ from airflow.operators.python import PythonOperator, get_current_context from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s +import requests +import yaml from airflow import DAG @@ -28,6 +30,7 @@ UNITY_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-unity/stage-in-workflow.cwl" DAAC_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-daac/stage-in-workflow.cwl" LOCAL_DIR = "/shared-task-data" +DOWNLOAD_DIR = "input" # The path of the working directory where the CWL workflow is executed # (aka the starting directory for cwl-runner). @@ -140,25 +143,23 @@ ) -def setup(ti=None, **context): +def create_local_dir(dag_run_id): """ - Task that creates the working directory on the shared volume - and parses the input parameter values. + Create local directory for working DAG data. """ - context = get_current_context() - dag_run_id = context["dag_run"].run_id local_dir = f"{LOCAL_DIR}/{dag_run_id}" - logging.info(f"Creating directory: {local_dir}") os.makedirs(local_dir, exist_ok=True) logging.info(f"Created directory: {local_dir}") - # select the node pool based on what resources were requested + +def select_node_pool(ti, request_storage, request_memory, request_cpu): + """ + Select node pool based on resources requested in input parameters. + """ node_pool = unity_sps_utils.NODE_POOL_DEFAULT - storage = context["params"]["request_storage"] # 100Gi - storage = int(storage[0:-2]) # 100 - memory = context["params"]["request_memory"] # 32Gi - memory = int(memory[0:-2]) # 32 - cpu = int(context["params"]["request_cpu"]) # 8 + storage = int(request_storage[0:-2]) # 100Gi -> 100 + memory = int(request_memory[0:-2]) # 32Gi -> 32 + cpu = int(request_cpu) # 8 logging.info(f"Requesting storage={storage}Gi memory={memory}Gi CPU={cpu}") if (storage > 30) or (memory > 32) or (cpu > 8): @@ -166,18 +167,24 @@ def setup(ti=None, **context): logging.info(f"Selecting node pool={node_pool}") ti.xcom_push(key="node_pool_processing", value=node_pool) - # select "use_ecr" argument and determine if ECR login is required - logging.info("Use ECR: %s", context["params"]["use_ecr"]) - if context["params"]["use_ecr"]: + +def select_ecr(ti, use_ecr): + """ + Determine if ECR login is required. + """ + logging.info("Use ECR: %s", use_ecr) + if use_ecr: ecr_login = os.environ["AIRFLOW_VAR_ECR_URI"] ti.xcom_push(key="ecr_login", value=ecr_login) logging.info("ECR login: %s", ecr_login) - # define stage in arguments - stage_in_args = {"download_dir": "input", "stac_json": context["params"]["stac_json_url"]} - # select stage in workflow based on input location - if context["params"]["input_location"] == "daac": +def select_stage_in(ti, stac_json_url, input_location): + """ + Determine stage in workflow and required arguments. + """ + stage_in_args = {"download_dir": DOWNLOAD_DIR, "stac_json": stac_json_url} + if input_location == "daac": stage_in_workflow = DAAC_STAGE_IN_WORKFLOW else: stage_in_workflow = UNITY_STAGE_IN_WORKFLOW @@ -198,6 +205,57 @@ def setup(ti=None, **context): logging.info("Stage in arguments selected: %s", stage_in_args) +def select_process(ti, dag_run_id, cwl_args): + """ + Determine process task CWL arguments. + """ + input_dir = f"{WORKING_DIR}/{DOWNLOAD_DIR}" + if cwl_args.endswith("yml") or cwl_args.endswith("yaml"): + yaml_data = requests.get(cwl_args, headers={"User-Agent":"SPS/Airflow"}).text + json_data = yaml.safe_load(yaml_data) + else: + json_data = json.loads(cwl_args) + json_data["input"] = { + "class": "Directory", + "path": input_dir + } + ti.xcom_push(key="cwl_args", value=json.dumps(json_data)) + logging.info("Modified CWL args for processing task.") + + +def setup(ti=None, **context): + """ + Task that creates the working directory on the shared volume + and parses the input parameter values. + """ + context = get_current_context() + + # create local working directory + dag_run_id = context["dag_run"].run_id + create_local_dir(dag_run_id) + + # select the node pool based on what resources were requested + select_node_pool( + ti, + context["params"]["request_storage"], + context["params"]["request_memory"], + context["params"]["request_cpu"], + ) + + # select "use_ecr" argument and determine if ECR login is required + select_ecr(ti, context["params"]["use_ecr"]) + + # define stage in arguments + select_stage_in( + ti, + context["params"]["stac_json_url"], + context["params"]["input_location"], + ) + + # define process arguments + select_process(ti, dag_run_id, context["params"]["cwl_args"]) + + setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) @@ -260,7 +318,7 @@ def setup(ti=None, **context): "-w", "{{ params.cwl_workflow }}", "-j", - "{{ params.cwl_args }}", + "{{ ti.xcom_pull(task_ids='Setup', key='cwl_args') }}", "-e", "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", ], From 4ac5e6db1bd0feacfddff6805114af9c167236d4 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:13:18 -0500 Subject: [PATCH 02/32] Add HTTP download option --- airflow/dags/cwl_dag_modular.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 107c54fc..d408658a 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -29,6 +29,7 @@ # Task constants UNITY_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-unity/stage-in-workflow.cwl" DAAC_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-daac/stage-in-workflow.cwl" +HTTP_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_in_workflow.cwl" LOCAL_DIR = "/shared-task-data" DOWNLOAD_DIR = "input" @@ -135,7 +136,7 @@ "input_location": Param( DEFAULT_INPUT_LOCATION, type="string", - enum=["daac", "unity"], + enum=["daac", "unity", "http"], title="Input data location", description="Indicate whether input data should be retrieved from a DAAC or Unity", ), @@ -186,7 +187,7 @@ def select_stage_in(ti, stac_json_url, input_location): stage_in_args = {"download_dir": DOWNLOAD_DIR, "stac_json": stac_json_url} if input_location == "daac": stage_in_workflow = DAAC_STAGE_IN_WORKFLOW - else: + elif input_location == "unity": stage_in_workflow = UNITY_STAGE_IN_WORKFLOW ssm_client = boto3.client("ssm", region_name="us-west-2") ss_acct_num = ssm_client.get_parameter(Name=unity_sps_utils.SS_ACT_NUM, WithDecryption=True)[ @@ -197,6 +198,8 @@ def select_stage_in(ti, stac_json_url, input_location): WithDecryption=True, )["Parameter"]["Value"] stage_in_args["unity_client_id"] = unity_client_id + else: + stage_in_workflow = HTTP_STAGE_IN_WORKFLOW ti.xcom_push(key="stage_in_workflow", value=stage_in_workflow) logging.info("Stage In workflow selected: %s", stage_in_workflow) From 967636956cb5537af1f13655f67af5fef6392a5e Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:34:04 -0500 Subject: [PATCH 03/32] Fix linting and code format --- airflow/dags/cwl_dag_modular.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index d408658a..f812b757 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -15,14 +15,14 @@ from datetime import datetime import boto3 +import requests import unity_sps_utils +import yaml from airflow.models.baseoperator import chain from airflow.models.param import Param from airflow.operators.python import PythonOperator, get_current_context from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s -import requests -import yaml from airflow import DAG @@ -214,14 +214,11 @@ def select_process(ti, dag_run_id, cwl_args): """ input_dir = f"{WORKING_DIR}/{DOWNLOAD_DIR}" if cwl_args.endswith("yml") or cwl_args.endswith("yaml"): - yaml_data = requests.get(cwl_args, headers={"User-Agent":"SPS/Airflow"}).text + yaml_data = requests.get(cwl_args, headers={"User-Agent": "SPS/Airflow"}).text json_data = yaml.safe_load(yaml_data) else: json_data = json.loads(cwl_args) - json_data["input"] = { - "class": "Directory", - "path": input_dir - } + json_data["input"] = {"class": "Directory", "path": input_dir} ti.xcom_push(key="cwl_args", value=json.dumps(json_data)) logging.info("Modified CWL args for processing task.") From 8b7fd85b0fd1fd7cfaeea78da07caba2a9d809ad Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:15:12 -0500 Subject: [PATCH 04/32] Initial stage in, process, stage out entrypoint --- airflow/dags/cwl_dag_modular.py | 181 ++++++------------ airflow/docker/cwl/Dockerfile_modular | 26 +++ .../cwl/docker_cwl_entrypoint_modular.sh | 170 ++++++++++++++++ .../docker/cwl/docker_cwl_entrypoint_utils.py | 70 +++++++ airflow/plugins/unity_sps_utils.py | 4 +- 5 files changed, 328 insertions(+), 123 deletions(-) create mode 100644 airflow/docker/cwl/Dockerfile_modular create mode 100755 airflow/docker/cwl/docker_cwl_entrypoint_modular.sh create mode 100644 airflow/docker/cwl/docker_cwl_entrypoint_utils.py diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index f812b757..d8df4497 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -15,9 +15,7 @@ from datetime import datetime import boto3 -import requests import unity_sps_utils -import yaml from airflow.models.baseoperator import chain from airflow.models.param import Param from airflow.operators.python import PythonOperator, get_current_context @@ -27,11 +25,13 @@ from airflow import DAG # Task constants -UNITY_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-unity/stage-in-workflow.cwl" -DAAC_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-daac/stage-in-workflow.cwl" -HTTP_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_in_workflow.cwl" +STAGE_IN_WORKFLOW = ( + "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_in.cwl" +) +STAGE_OUT_WORKFLOW = ( + "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_out.cwl" +) LOCAL_DIR = "/shared-task-data" -DOWNLOAD_DIR = "input" # The path of the working directory where the CWL workflow is executed # (aka the starting directory for cwl-runner). @@ -43,9 +43,10 @@ "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/main/demos/echo_message.cwl" ) DEFAULT_CWL_ARGUMENTS = json.dumps({"message": "Hello Unity"}) -DEFAULT_STAC_JSON_URL = "https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/EMITL1BRAD_001/items?limit=2" -DEFAULT_INPUT_LOCATION = "daac" - +DEFAULT_STAGE_IN_ARGS = "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/test/ogc_app_package/stage_in.yml" +DEFAULT_STAGE_OUT_ARGS = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.yaml" +DEFAULT_STAGE_OUT_BUCKET = "unity-dev-unity-unity-data" +DEFAULT_COLLECTION_ID = "example-app-collection___3" # Alternative arguments to execute SBG Pre-Process # DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" @@ -71,13 +72,6 @@ # "ephemeral-storage": "30Gi" # }, ) -STAGE_IN_CONTAINER_RESOURCES = k8s.V1ResourceRequirements( - requests={ - "memory": "4Gi", - "cpu": "4", - "ephemeral-storage": "{{ params.request_storage }}", - } -) # Default DAG configuration dag_default_args = { @@ -127,18 +121,31 @@ title="Docker container storage", ), "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), - "stac_json_url": Param( - DEFAULT_STAC_JSON_URL, + "stage_in_args": Param( + DEFAULT_STAGE_IN_ARGS, + type="string", + title="Stage in workflow parameters", + description="The stage in job parameters encoded as a JSON string," + "or the URL of a JSON or YAML file", + ), + "stage_out_args": Param( + DEFAULT_STAGE_OUT_ARGS, + type="string", + title="Stage out workflow parameters", + description="The stage out job parameters encoded as a JSON string," + "or the URL of a JSON or YAML file", + ), + "stage_out_bucket": Param( + DEFAULT_STAGE_OUT_BUCKET, type="string", - title="STAC JSON URL", - description="The URL to the STAC JSON document", + title="Stage out S3 bucket", + description="S3 bucket to stage data out to", ), - "input_location": Param( - DEFAULT_INPUT_LOCATION, + "collection_id": Param( + DEFAULT_COLLECTION_ID, type="string", - enum=["daac", "unity", "http"], - title="Input data location", - description="Indicate whether input data should be retrieved from a DAAC or Unity", + title="Output collection identifier", + description="Collection identifier to use for output (processed) data", ), }, ) @@ -180,47 +187,19 @@ def select_ecr(ti, use_ecr): logging.info("ECR login: %s", ecr_login) -def select_stage_in(ti, stac_json_url, input_location): - """ - Determine stage in workflow and required arguments. - """ - stage_in_args = {"download_dir": DOWNLOAD_DIR, "stac_json": stac_json_url} - if input_location == "daac": - stage_in_workflow = DAAC_STAGE_IN_WORKFLOW - elif input_location == "unity": - stage_in_workflow = UNITY_STAGE_IN_WORKFLOW - ssm_client = boto3.client("ssm", region_name="us-west-2") - ss_acct_num = ssm_client.get_parameter(Name=unity_sps_utils.SS_ACT_NUM, WithDecryption=True)[ - "Parameter" - ]["Value"] - unity_client_id = ssm_client.get_parameter( - Name=f"arn:aws:ssm:us-west-2:{ss_acct_num}:parameter{unity_sps_utils.DS_CLIENT_ID_PARAM}", - WithDecryption=True, - )["Parameter"]["Value"] - stage_in_args["unity_client_id"] = unity_client_id - else: - stage_in_workflow = HTTP_STAGE_IN_WORKFLOW - - ti.xcom_push(key="stage_in_workflow", value=stage_in_workflow) - logging.info("Stage In workflow selected: %s", stage_in_workflow) +def select_stage_out(ti): + """Retrieve API key and account id from SSM parameter store.""" + ssm_client = boto3.client("ssm", region_name="us-west-2") - ti.xcom_push(key="stage_in_args", value=stage_in_args) - logging.info("Stage in arguments selected: %s", stage_in_args) + api_key = ssm_client.get_parameter( + Name=unity_sps_utils.SPS_CLOUDTAMER_API_KEY_PARAM, WithDecryption=True + )["Parameter"]["Value"] + ti.xcom_push(key="api_key", value=api_key) - -def select_process(ti, dag_run_id, cwl_args): - """ - Determine process task CWL arguments. - """ - input_dir = f"{WORKING_DIR}/{DOWNLOAD_DIR}" - if cwl_args.endswith("yml") or cwl_args.endswith("yaml"): - yaml_data = requests.get(cwl_args, headers={"User-Agent": "SPS/Airflow"}).text - json_data = yaml.safe_load(yaml_data) - else: - json_data = json.loads(cwl_args) - json_data["input"] = {"class": "Directory", "path": input_dir} - ti.xcom_push(key="cwl_args", value=json.dumps(json_data)) - logging.info("Modified CWL args for processing task.") + account_id = ssm_client.get_parameter( + Name=unity_sps_utils.SPS_CLOUDTAMER_ACCOUNT_ID, WithDecryption=True + )["Parameter"]["Value"] + ti.xcom_push(key="account_id", value=account_id) def setup(ti=None, **context): @@ -245,65 +224,13 @@ def setup(ti=None, **context): # select "use_ecr" argument and determine if ECR login is required select_ecr(ti, context["params"]["use_ecr"]) - # define stage in arguments - select_stage_in( - ti, - context["params"]["stac_json_url"], - context["params"]["input_location"], - ) - - # define process arguments - select_process(ti, dag_run_id, context["params"]["cwl_args"]) + # retrieve stage out aws api key and account id + select_stage_out(ti) setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) -cwl_task_stage_in = unity_sps_utils.SpsKubernetesPodOperator( - retries=0, - task_id="cwl_task_stage_in", - namespace=unity_sps_utils.POD_NAMESPACE, - name="cwl-task-pod", - image=unity_sps_utils.SPS_DOCKER_CWL_IMAGE, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=1800, - arguments=[ - "-w", - "{{ ti.xcom_pull(task_ids='Setup', key='stage_in_workflow') }}", - "-j", - "{{ ti.xcom_pull(task_ids='Setup', key='stage_in_args') }}", - "-e", - "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", - ], - container_security_context={"privileged": True}, - container_resources=STAGE_IN_CONTAINER_RESOURCES, - container_logs=True, - volume_mounts=[ - k8s.V1VolumeMount(name="workers-volume", mount_path=WORKING_DIR, sub_path="{{ dag_run.run_id }}") - ], - volumes=[ - k8s.V1Volume( - name="workers-volume", - persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name="airflow-kpo"), - ) - ], - dag=dag, - node_selector={"karpenter.sh/nodepool": unity_sps_utils.NODE_POOL_DEFAULT}, - labels={"app": unity_sps_utils.POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=unity_sps_utils.get_affinity( - capacity_type=["spot"], - # instance_type=["t3.2xlarge"], - anti_affinity_label=unity_sps_utils.POD_LABEL, - ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, -) - - cwl_task_processing = unity_sps_utils.SpsKubernetesPodOperator( retries=0, task_id="cwl_task_processing", @@ -315,12 +242,23 @@ def setup(ti=None, **context): get_logs=True, startup_timeout_seconds=1800, arguments=[ + "-i", + "{{ params.stage_in_args }}", + "-k", + STAGE_IN_WORKFLOW, "-w", "{{ params.cwl_workflow }}", "-j", - "{{ ti.xcom_pull(task_ids='Setup', key='cwl_args') }}", + "{{ params.cwl_args }}", "-e", "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", + "-c", + "{{ params.collection_id }}", + "-b", + "{{ params.stage_out_bucket }}", + "-a", + "{{ ti.xcom_pull(task_ids='Setup', key='api_key') }}" "-s", + "{{ ti.xcom_pull(task_ids='Setup', key='account_id') }}", ], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, @@ -371,6 +309,5 @@ def cleanup(**context): task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) -chain( - setup_task.as_setup(), cwl_task_stage_in, cwl_task_processing, cleanup_task.as_teardown(setups=setup_task) -) + +chain(setup_task.as_setup(), cwl_task_processing, cleanup_task.as_teardown(setups=setup_task)) diff --git a/airflow/docker/cwl/Dockerfile_modular b/airflow/docker/cwl/Dockerfile_modular new file mode 100644 index 00000000..8c77d03f --- /dev/null +++ b/airflow/docker/cwl/Dockerfile_modular @@ -0,0 +1,26 @@ +# docker:dind Dockerfile: https://github.com/docker-library/docker/blob/master/Dockerfile-dind.template +# FROM docker:dind +FROM docker:25.0.3-dind + +# install Python +RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +RUN apk add gcc musl-dev linux-headers python3-dev +RUN apk add --no-cache python3 py3-pip +RUN apk add vim + +# install CWL libraries +RUN mkdir /usr/share/cwl \ + && cd /usr/share/cwl \ + && python -m venv venv \ + && source venv/bin/activate \ + && pip install cwltool cwl-runner docker boto3 awscli pyyaml + +# install nodejs to parse Javascript in CWL files +RUN apk add --no-cache nodejs npm + +# script to execute a generic CWL workflow with arguments +COPY docker_cwl_entrypoint_modular.sh /usr/share/cwl/docker_cwl_entrypoint_modular.sh +COPY docker_cwl_entrypoint_utils.py /usr/share/cwl/docker_cwl_entrypoint_utils.py + +WORKDIR /usr/share/cwl +ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint_modular.sh"] diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh new file mode 100755 index 00000000..7b480be6 --- /dev/null +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -0,0 +1,170 @@ +#!/bin/sh +# Script to execute a CWL workflow that includes Docker containers +# The Docker engine is started before the CWL execution, and stopped afterwards. +# -w: the CWL workflow URL +# (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.cwl) +# -j: a) the CWL job parameters as a JSON formatted string +# (example: { "name": "John Doe" }) +# OR b) The URL of a YAML or JSON file containing the job parameters +# (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.dev.yml) +# -e: the ECR login URL where the AWS account ID and region are specific to the Airflow installation +# (example: .dkr.ecr..amazonaws.com) [optional] +# -o: path to an output JSON file that needs to be shared as Airflow "xcom" data [optional] +# -c: collection identifier for process task created collection and stage out task upload +# -b: stage out s3 bucket to upload processed data to +# -a: Cloudtamer API key with permissions to retrieve temporary AWS credentials +# -s: AWS account ID to retrieve credentials for + +# API credential retrieval +CLOUDTAMER_API_URL="https://login.mcp.nasa.gov/api/v3" +CLOUDTAMER_ROLE="mcp-tenantOperator" + +# Must be the same as the path of the Persistent Volume mounted by the Airflow KubernetesPodOperator +# that executes this script +WORKING_DIR="/scratch" + +get_job_args() { + local job_args=$1 + workflow=$2 + # switch between the 2 cases a) and b) for job_args + # remove arguments from previous tasks + if [ "$job_args" = "${job_args#{}" ] + then + # job_args does NOT start with '{' + job_args_file=$job_args + else + # job_args starts with '{' + echo "$job_args" > ./job_args_$workflow.json + job_args_file="./job_args_$workflow.json" + fi + echo $job_args_file +} + +get_aws_credentials() { + local cloudtamer_api_key=$1 + local aws_account_id=$2 + response=$( + curl -s \ + -XPOST \ + -H "accept: application/json" \ + -H "Authorization: Bearer ${cloudtamer_api_key}" \ + -H "Content-Type: application/json" \ + "${CLOUDTAMER_API_URL}/temporary-credentials" \ + -d "{\"account_number\": \"$aws_account_id\",\"iam_role_name\": \"$CLOUDTAMER_ROLE\"}" + ) + + access_key_id=$(echo $response | jq -r .data.access_key) + secret_access_key=$(echo $response | jq -r .data.secret_access_key) + session_token=$(echo $response | jq -r .data.session_token) + echo $access_key_id,$secret_access_key,$session_token +} + +set -ex +while getopts i:k:w:j:e:o:c:b:a:s: flag +do + case "${flag}" in + i) cwl_workflow_stage_in=${OPTARG};; + k) job_args_stage_in=${OPTARG};; + w) cwl_workflow_process=${OPTARG};; + j) job_args_process=${OPTARG};; + e) ecr_login=${OPTARG};; + o) json_output=${OPTARG};; + c) collection_id=${OPTARG};; + b) bucket=${OPTARG};; + a) api_key=${OPTARG};; + s) aws_account_id=${OPTARG};; + esac +done + +# create working directory if it doesn't exist +mkdir -p "$WORKING_DIR" +cd $WORKING_DIR + +# stage in job args +rm -f ./job_args_stage_in.json +job_args_stage_in="$(get_job_args "$job_args_stage_in" stage_in)" +echo JOB_ARGS_STAGE_IN $job_args_stage_in +echo "Executing the CWL workflow: $cwl_workflow_stage_in with json arguments: $job_args_stage_in and working directory: $WORKING_DIR" + +# process job args +rm -rf ./job_args_process.json +job_args_process="$(get_job_args "$job_args_process" process)" +echo "Executing the CWL workflow: $cwl_workflow_process with json arguments: $job_args_process and working directory: $WORKING_DIR" + +echo "JSON XCOM output: ${json_output}" + +# Start Docker engine +dockerd &> dockerd-logfile & + +# Wait until Docker engine is running +# Loop until 'docker version' exits with 0. +until docker version > /dev/null 2>&1 +do + sleep 1 +done + +# Activate Python virtual environments for executables +. /usr/share/cwl/venv/bin/activate + +# Log into AWS ECR repository +if [ "$ecr_login" != "None" ]; then +IFS=. read account_id dkr ecr aws_region amazonaws com < Date: Tue, 26 Nov 2024 13:25:26 -0500 Subject: [PATCH 05/32] Re-organize input parameters and fix command line arguments to entrypoint --- airflow/dags/cwl_dag_modular.py | 102 ++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index d8df4497..6ce40e8f 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -3,8 +3,12 @@ The Airflow KubernetesPodOperator starts a Docker container that includes the Docker engine and the CWL libraries. The "cwl-runner" tool is invoked to execute the CWL workflow. -Parameter cwl_workflow: the URL of the CWL workflow to execute. -Parameter args_as_json: JSON string contained the specific values for the workflow specific inputs. +Parameter stage_in_args: The stage in job parameters encoded as a JSON string +Parameter process_workflow: the URL of the CWL workflow to execute. +Parameter process_args: JSON string contained the specific values for the processing workflow specific inputs. +Parameter stage_out_args: The stage out job parameters encoded as a JSON string or URL of JSON/YAML file. +Parameter stage_out_bucket: The S3 bucket to stage data out to. +Parameter collection_id: The output collection identifier for processed data. """ import json @@ -39,22 +43,22 @@ WORKING_DIR = "/scratch" # Default parameters -DEFAULT_CWL_WORKFLOW = ( - "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/main/demos/echo_message.cwl" +DEFAULT_PROCESS_WORKFLOW = ( + "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/process.cwl" ) -DEFAULT_CWL_ARGUMENTS = json.dumps({"message": "Hello Unity"}) +DEFAULT_PROCESS_ARGS = json.dumps({"example_argument_empty": ""}) DEFAULT_STAGE_IN_ARGS = "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/test/ogc_app_package/stage_in.yml" DEFAULT_STAGE_OUT_ARGS = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.yaml" DEFAULT_STAGE_OUT_BUCKET = "unity-dev-unity-unity-data" DEFAULT_COLLECTION_ID = "example-app-collection___3" # Alternative arguments to execute SBG Pre-Process -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" -# DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" +# DEFAULT_PROCESS_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" +# DEFAULT_PROCESS_ARGS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" # Alternative arguments to execute SBG end-to-end -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.cwl" -# DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.dev.yml" +# DEFAULT_PROCESS_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.cwl" +# DEFAULT_PROCESS_ARGS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.dev.yml" # Alternative arguments to execute SBG end-to-end # unity_sps_sbg_debug.txt @@ -93,47 +97,31 @@ max_active_tasks=30, default_args=dag_default_args, params={ - "cwl_workflow": Param( - DEFAULT_CWL_WORKFLOW, type="string", title="CWL workflow", description="The CWL workflow URL" - ), - "cwl_args": Param( - DEFAULT_CWL_ARGUMENTS, - type="string", - title="CWL workflow parameters", - description=("The job parameters encoded as a JSON string," "or the URL of a JSON or YAML file"), - ), - "request_memory": Param( - "4Gi", - type="string", - enum=["4Gi", "8Gi", "16Gi", "32Gi", "64Gi", "128Gi", "256Gi"], - title="Docker container memory", - ), - "request_cpu": Param( - "4", + "stage_in_args": Param( + DEFAULT_STAGE_IN_ARGS, type="string", - enum=["2", "4", "8", "16", "32"], - title="Docker container CPU", + title="Stage in workflow parameters", + description="The stage in job parameters encoded as a JSON string or the URL of a JSON or YAML file", ), - "request_storage": Param( - "10Gi", + "process_workflow": Param( + DEFAULT_PROCESS_WORKFLOW, type="string", - enum=["10Gi", "50Gi", "100Gi", "150Gi", "200Gi", "250Gi"], - title="Docker container storage", + title="Processing workflow", + description="The processing workflow URL", ), - "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), - "stage_in_args": Param( - DEFAULT_STAGE_IN_ARGS, + "process_args": Param( + DEFAULT_PROCESS_ARGS, type="string", - title="Stage in workflow parameters", - description="The stage in job parameters encoded as a JSON string," - "or the URL of a JSON or YAML file", + title="Processing workflow parameters", + description=( + "The processing job parameters encoded as a JSON string," "or the URL of a JSON or YAML file" + ), ), "stage_out_args": Param( DEFAULT_STAGE_OUT_ARGS, type="string", title="Stage out workflow parameters", - description="The stage out job parameters encoded as a JSON string," - "or the URL of a JSON or YAML file", + description="The stage out job parameters encoded as a JSON string, or the URL of a JSON or YAML file", ), "stage_out_bucket": Param( DEFAULT_STAGE_OUT_BUCKET, @@ -147,6 +135,25 @@ title="Output collection identifier", description="Collection identifier to use for output (processed) data", ), + "request_memory": Param( + "4Gi", + type="string", + enum=["4Gi", "8Gi", "16Gi", "32Gi", "64Gi", "128Gi", "256Gi"], + title="Docker container memory", + ), + "request_cpu": Param( + "4", + type="string", + enum=["2", "4", "8", "16", "32"], + title="Docker container CPU", + ), + "request_storage": Param( + "10Gi", + type="string", + enum=["10Gi", "50Gi", "100Gi", "150Gi", "200Gi", "250Gi"], + title="Docker container storage", + ), + "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), }, ) @@ -194,11 +201,13 @@ def select_stage_out(ti): api_key = ssm_client.get_parameter( Name=unity_sps_utils.SPS_CLOUDTAMER_API_KEY_PARAM, WithDecryption=True )["Parameter"]["Value"] + logging.info("Retrieved Cloudtamer API key.") ti.xcom_push(key="api_key", value=api_key) account_id = ssm_client.get_parameter( Name=unity_sps_utils.SPS_CLOUDTAMER_ACCOUNT_ID, WithDecryption=True )["Parameter"]["Value"] + logging.info("Retrieved AWS account identifier.") ti.xcom_push(key="account_id", value=account_id) @@ -243,13 +252,15 @@ def setup(ti=None, **context): startup_timeout_seconds=1800, arguments=[ "-i", - "{{ params.stage_in_args }}", - "-k", STAGE_IN_WORKFLOW, + "-k", + "{{ params.stage_in_args }}", "-w", - "{{ params.cwl_workflow }}", + "{{ params.process_workflow }}", "-j", - "{{ params.cwl_args }}", + "{{ params.process_args }}", + "-f", + STAGE_OUT_WORKFLOW, "-e", "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", "-c", @@ -257,7 +268,8 @@ def setup(ti=None, **context): "-b", "{{ params.stage_out_bucket }}", "-a", - "{{ ti.xcom_pull(task_ids='Setup', key='api_key') }}" "-s", + "{{ ti.xcom_pull(task_ids='Setup', key='api_key') }}", + "-s", "{{ ti.xcom_pull(task_ids='Setup', key='account_id') }}", ], container_security_context={"privileged": True}, From 8c119a53bce29bd7542274938d53c80e5a499226 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:26:17 -0500 Subject: [PATCH 06/32] Fix formatting and references to CWL workflow files --- .../cwl/docker_cwl_entrypoint_modular.sh | 9 ++-- .../docker/cwl/docker_cwl_entrypoint_utils.py | 41 +++++++------------ 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index 7b480be6..5e371599 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -60,13 +60,14 @@ get_aws_credentials() { } set -ex -while getopts i:k:w:j:e:o:c:b:a:s: flag +while getopts i:k:w:j:e:o:c:b:a:s:f: flag do case "${flag}" in i) cwl_workflow_stage_in=${OPTARG};; k) job_args_stage_in=${OPTARG};; w) cwl_workflow_process=${OPTARG};; j) job_args_process=${OPTARG};; + f) cwl_workflow_stage_out=${OPTARG};; e) ecr_login=${OPTARG};; o) json_output=${OPTARG};; c) collection_id=${OPTARG};; @@ -116,7 +117,7 @@ echo "Logged into: $ecr_login" fi # Stage in operations -stage_in=$(cwltool --outdir stage_in --copy-output stage_in.cwl test/ogc_app_package/stage_in.yml) +stage_in=$(cwltool --outdir $cwl_workflow_stage_in --copy-output stage_in.cwl test/ogc_app_package/stage_in.yml) # Get directory that contains downloads stage_in_dir=$(echo $stage_in | jq '.stage_in_download_dir.basename') @@ -133,7 +134,7 @@ echo "Editing process $job_args_process" ./entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id # Process operations -process=$(cwltool process.cwl $job_args_process) +process=$(cwltool $cwl_workflow_process $job_args_process) # Get directory that contains processed files process_dir=$(echo $process | jq '.output.basename') @@ -146,7 +147,7 @@ credentials="$(get_aws_credentials "$api_key" "$aws_account_id")" aws_key="$(cut -d ',' -f 1 <<< $credentials)" aws_secret="$(cut -d ',' -f 2 <<< $credentials)" aws_token="$(cut -d ',' -f 3 <<< $credentials)" -stage_out=$(cwltool stage_out.cwl \ +stage_out=$(cwltool $cwl_workflow_stage_out \ --output_dir $process_dir \ --staging_bucket $bucket \ --collection_id $collection_id \ diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_utils.py b/airflow/docker/cwl/docker_cwl_entrypoint_utils.py index fff25116..d8b28437 100644 --- a/airflow/docker/cwl/docker_cwl_entrypoint_utils.py +++ b/airflow/docker/cwl/docker_cwl_entrypoint_utils.py @@ -2,33 +2,20 @@ import argparse import json + import yaml def create_args(): """Create and return argparser.""" - arg_parser = argparse.ArgumentParser(description='Retrieve entrypoint utilities arguments') - arg_parser.add_argument('-c', - '--catalogjson', - type=str, - default='', - help='Path to catalog JSON file') - arg_parser.add_argument('-j', - '--jobargs', - type=str, - default='', - help='Process CWL job argument file') - arg_parser.add_argument('-i', - '--processinput', - type=str, - default='', - help='Process input directory') - arg_parser.add_argument('-d', - '--collectionid', - type=str, - default='', - help='Process and stage out collection identifier') + arg_parser = argparse.ArgumentParser(description="Retrieve entrypoint utilities arguments") + arg_parser.add_argument("-c", "--catalogjson", type=str, default="", help="Path to catalog JSON file") + arg_parser.add_argument("-j", "--jobargs", type=str, default="", help="Process CWL job argument file") + arg_parser.add_argument("-i", "--processinput", type=str, default="", help="Process input directory") + arg_parser.add_argument( + "-d", "--collectionid", type=str, default="", help="Process and stage out collection identifier" + ) return arg_parser @@ -38,11 +25,11 @@ def update_catalog_json(catalog_json): with open(catalog_json) as jf: catalog_data = json.load(jf) - for link in catalog_data['links']: - if link['rel'] == 'root': - link['href'] = 'catalog.json' + for link in catalog_data["links"]: + if link["rel"] == "root": + link["href"] = "catalog.json" - with open(catalog_json, 'w') as jf: + with open(catalog_json, "w") as jf: json.dump(catalog_data, jf, indent=2) @@ -50,13 +37,13 @@ def update_process_job_args(job_args, process_input, collection_id): """Update job arguments with input directory.""" with open(job_args) as fh: - if job_args.endswith('yaml') or job_args.endswith('yml'): + if job_args.endswith("yaml") or job_args.endswith("yml"): json_data = yaml.safe_load(fh) else: json_data = json.load(fh) json_data["input"] = {"class": "Directory", "path": process_input} json_data["output_collection"] = collection_id - with open(job_args, 'w') as jf: + with open(job_args, "w") as jf: json.dump(json_data, jf) From 4dd974b67ed2cdda88f852ea5d80419f6a47c3a6 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:38:34 -0500 Subject: [PATCH 07/32] Fix stage in workflow reference --- airflow/docker/cwl/docker_cwl_entrypoint_modular.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index 5e371599..055bc79d 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -117,7 +117,7 @@ echo "Logged into: $ecr_login" fi # Stage in operations -stage_in=$(cwltool --outdir $cwl_workflow_stage_in --copy-output stage_in.cwl test/ogc_app_package/stage_in.yml) +stage_in=$(cwltool --outdir stage_in --copy-output $cwl_workflow_stage_in $job_args_stage_in) # Get directory that contains downloads stage_in_dir=$(echo $stage_in | jq '.stage_in_download_dir.basename') From 9280d7417386ba247ac3a9d34575d8e79a1f8dbd Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:57:04 -0500 Subject: [PATCH 08/32] Fix reference to python utilities and install jq --- airflow/docker/cwl/Dockerfile_modular | 2 +- airflow/docker/cwl/docker_cwl_entrypoint_modular.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/docker/cwl/Dockerfile_modular b/airflow/docker/cwl/Dockerfile_modular index 8c77d03f..af61f25b 100644 --- a/airflow/docker/cwl/Dockerfile_modular +++ b/airflow/docker/cwl/Dockerfile_modular @@ -4,7 +4,7 @@ FROM docker:25.0.3-dind # install Python RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python -RUN apk add gcc musl-dev linux-headers python3-dev +RUN apk add gcc musl-dev linux-headers python3-dev jq RUN apk add --no-cache python3 py3-pip RUN apk add vim diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index 055bc79d..9f68c7c2 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -127,11 +127,11 @@ ls -l $stage_in_dir/ # Remove extraneous directory in front of catalog.json echo "Editing stage in catalog.json" -./entrypoint_utils.py -c "$stage_in_dir/catalog.json" +./docker_cwl_entrypoint_utils.py -c "$stage_in_dir/catalog.json" # Add input directory and output collection into process job arguments echo "Editing process $job_args_process" -./entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id +./docker_cwl_entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id # Process operations process=$(cwltool $cwl_workflow_process $job_args_process) From 1e3524d7d77eb815dde542c9eba6c42da82861ac Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:23:38 -0500 Subject: [PATCH 09/32] Pull stage out AWS credentials from SSM parameters --- airflow/dags/cwl_dag_modular.py | 30 ++++++++++------ .../cwl/docker_cwl_entrypoint_modular.sh | 34 ++++--------------- airflow/plugins/unity_sps_utils.py | 5 +-- 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 6ce40e8f..0c68d6f7 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -33,7 +33,7 @@ "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_in.cwl" ) STAGE_OUT_WORKFLOW = ( - "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_out.cwl" + "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.cwl" ) LOCAL_DIR = "/shared-task-data" @@ -198,17 +198,23 @@ def select_stage_out(ti): """Retrieve API key and account id from SSM parameter store.""" ssm_client = boto3.client("ssm", region_name="us-west-2") - api_key = ssm_client.get_parameter( - Name=unity_sps_utils.SPS_CLOUDTAMER_API_KEY_PARAM, WithDecryption=True + aws_key = ssm_client.get_parameter( + Name=unity_sps_utils.DS_STAGE_OUT_AWS_KEY, WithDecryption=True )["Parameter"]["Value"] - logging.info("Retrieved Cloudtamer API key.") - ti.xcom_push(key="api_key", value=api_key) + logging.info("Retrieved stage out AWS access key.") + ti.xcom_push(key="aws_key", value=aws_key) - account_id = ssm_client.get_parameter( - Name=unity_sps_utils.SPS_CLOUDTAMER_ACCOUNT_ID, WithDecryption=True + aws_secret = ssm_client.get_parameter( + Name=unity_sps_utils.DS_STAGE_OUT_AWS_SECRET, WithDecryption=True )["Parameter"]["Value"] - logging.info("Retrieved AWS account identifier.") - ti.xcom_push(key="account_id", value=account_id) + logging.info("Retrieved stage out AWS access secret.") + ti.xcom_push(key="aws_secret", value=aws_secret) + + aws_token = ssm_client.get_parameter( + Name=unity_sps_utils.DS_STAGE_OUT_AWS_TOKEN, WithDecryption=True + )["Parameter"]["Value"] + logging.info("Retrieved stage out AWS access token.") + ti.xcom_push(key="aws_token", value=aws_token) def setup(ti=None, **context): @@ -268,9 +274,11 @@ def setup(ti=None, **context): "-b", "{{ params.stage_out_bucket }}", "-a", - "{{ ti.xcom_pull(task_ids='Setup', key='api_key') }}", + "{{ ti.xcom_pull(task_ids='Setup', key='aws_key') }}", "-s", - "{{ ti.xcom_pull(task_ids='Setup', key='account_id') }}", + "{{ ti.xcom_pull(task_ids='Setup', key='aws_secret') }}", + "-t", + "{{ ti.xcom_pull(task_ids='Setup', key='aws_token') }}" ], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index 9f68c7c2..a14013e5 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -40,27 +40,8 @@ get_job_args() { echo $job_args_file } -get_aws_credentials() { - local cloudtamer_api_key=$1 - local aws_account_id=$2 - response=$( - curl -s \ - -XPOST \ - -H "accept: application/json" \ - -H "Authorization: Bearer ${cloudtamer_api_key}" \ - -H "Content-Type: application/json" \ - "${CLOUDTAMER_API_URL}/temporary-credentials" \ - -d "{\"account_number\": \"$aws_account_id\",\"iam_role_name\": \"$CLOUDTAMER_ROLE\"}" - ) - - access_key_id=$(echo $response | jq -r .data.access_key) - secret_access_key=$(echo $response | jq -r .data.secret_access_key) - session_token=$(echo $response | jq -r .data.session_token) - echo $access_key_id,$secret_access_key,$session_token -} - set -ex -while getopts i:k:w:j:e:o:c:b:a:s:f: flag +while getopts i:k:w:j:e:o:c:b:a:s:f:t: flag do case "${flag}" in i) cwl_workflow_stage_in=${OPTARG};; @@ -72,8 +53,9 @@ do o) json_output=${OPTARG};; c) collection_id=${OPTARG};; b) bucket=${OPTARG};; - a) api_key=${OPTARG};; - s) aws_account_id=${OPTARG};; + a) aws_key=${OPTARG};; + s) aws_secret=${OPTARG};; + t) aws_token=${OPTARG};; esac done @@ -127,11 +109,11 @@ ls -l $stage_in_dir/ # Remove extraneous directory in front of catalog.json echo "Editing stage in catalog.json" -./docker_cwl_entrypoint_utils.py -c "$stage_in_dir/catalog.json" +/usr/share/cwl/docker_cwl_entrypoint_utils.py -c "$stage_in_dir/catalog.json" # Add input directory and output collection into process job arguments echo "Editing process $job_args_process" -./docker_cwl_entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id +/usr/share/cwl/docker_cwl_entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id # Process operations process=$(cwltool $cwl_workflow_process $job_args_process) @@ -143,10 +125,6 @@ echo "Process output directory: $process_dir" ls -l $process_dir # Stage out operations -credentials="$(get_aws_credentials "$api_key" "$aws_account_id")" -aws_key="$(cut -d ',' -f 1 <<< $credentials)" -aws_secret="$(cut -d ',' -f 2 <<< $credentials)" -aws_token="$(cut -d ',' -f 3 <<< $credentials)" stage_out=$(cwltool $cwl_workflow_stage_out \ --output_dir $process_dir \ --staging_bucket $bucket \ diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index 5726c4a6..030ca296 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -15,8 +15,9 @@ DS_CLIENT_ID_PARAM = "/unity/shared-services/cognito/hysds-ui-client-id" SS_ACT_NUM = "/unity/shared-services/aws/account" -SPS_CLOUDTAMER_API_KEY_PARAM = "/unity-nikki-1/dev/sps/processing/cloudtamer-api-key" -SPS_CLOUDTAMER_ACCOUNT_ID = "/unity-nikki-1/dev/sps/processing/cloudtamer-account-id" +DS_STAGE_OUT_AWS_KEY = "/unity-nikki-1/dev/sps/processing/aws-key" +DS_STAGE_OUT_AWS_SECRET = "/unity-nikki-1/dev/sps/processing/aws-secret" +DS_STAGE_OUT_AWS_TOKEN = "/unity-nikki-1/dev/sps/processing/aws-token" class SpsKubernetesPodOperator(KubernetesPodOperator): From f804def1da89af3ef26d373a8aa28a15f453be7f Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:24:02 -0500 Subject: [PATCH 10/32] Set entrypoint utils script to executable --- airflow/docker/cwl/Dockerfile_modular | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/docker/cwl/Dockerfile_modular b/airflow/docker/cwl/Dockerfile_modular index af61f25b..6cca6389 100644 --- a/airflow/docker/cwl/Dockerfile_modular +++ b/airflow/docker/cwl/Dockerfile_modular @@ -21,6 +21,7 @@ RUN apk add --no-cache nodejs npm # script to execute a generic CWL workflow with arguments COPY docker_cwl_entrypoint_modular.sh /usr/share/cwl/docker_cwl_entrypoint_modular.sh COPY docker_cwl_entrypoint_utils.py /usr/share/cwl/docker_cwl_entrypoint_utils.py +RUN chmod +x /usr/share/cwl/docker_cwl_entrypoint_utils.py WORKDIR /usr/share/cwl ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint_modular.sh"] From 97e7a52b7003ca4cfdd99802713440414b15687a Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:13:18 -0500 Subject: [PATCH 11/32] Build separate CWL DAG modular container image --- .github/workflows/build_docker_images.yml | 24 +++++++++++++++++++++++ airflow/plugins/unity_sps_utils.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 428391b6..6986a2ee 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -13,6 +13,7 @@ env: TAG: ${{ github.event.inputs.tag }} SPS_AIRFLOW: ${{ github.repository }}/sps-airflow SPS_DOCKER_CWL: ${{ github.repository }}/sps-docker-cwl + SPS_DOCKER_CWL_MODULAR: ${{ github.repository }}/sps-docker-cwl-modular jobs: build-sps-airflow: @@ -61,3 +62,26 @@ jobs: push: true tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }}:${{ env.TAG }} labels: ${{ steps.metascheduler.outputs.labels }} + build-sps-docker-cwl-modular: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for SPS Docker CWL modular image + id: metascheduler + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL_MODULAR }} + - name: Build and push SPS Docker CWL modular image + uses: docker/build-push-action@v5 + with: + context: ./airflow/docker/cwl + file: airflow/docker/cwl/Dockerfile-modular + push: true + tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL_MODULAR }}:${{ env.TAG }} + labels: ${{ steps.metascheduler.outputs.labels }} diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index 030ca296..39563b03 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -8,7 +8,7 @@ # Shared constants POD_NAMESPACE = "sps" # The Kubernetes namespace within which the Pod is run (it must already exist) POD_LABEL = "cwl_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:220-stage-in-task" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl-modular:2.3.0" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" From 2b6a01a7d3990fd0d387e62a666e3234d0c3b266 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:40:32 -0500 Subject: [PATCH 12/32] Clean up variables and stage out input parameter --- airflow/dags/cwl_dag_modular.py | 7 ------- airflow/docker/cwl/docker_cwl_entrypoint_modular.sh | 4 ---- 2 files changed, 11 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 1ec0d03c..a828b2eb 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -6,7 +6,6 @@ Parameter stage_in_args: The stage in job parameters encoded as a JSON string Parameter process_workflow: the URL of the CWL workflow to execute. Parameter process_args: JSON string contained the specific values for the processing workflow specific inputs. -Parameter stage_out_args: The stage out job parameters encoded as a JSON string or URL of JSON/YAML file. Parameter stage_out_bucket: The S3 bucket to stage data out to. Parameter collection_id: The output collection identifier for processed data. """ @@ -117,12 +116,6 @@ "The processing job parameters encoded as a JSON string," "or the URL of a JSON or YAML file" ), ), - "stage_out_args": Param( - DEFAULT_STAGE_OUT_ARGS, - type="string", - title="Stage out workflow parameters", - description="The stage out job parameters encoded as a JSON string, or the URL of a JSON or YAML file", - ), "stage_out_bucket": Param( DEFAULT_STAGE_OUT_BUCKET, type="string", diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index a14013e5..e474081c 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -15,10 +15,6 @@ # -a: Cloudtamer API key with permissions to retrieve temporary AWS credentials # -s: AWS account ID to retrieve credentials for -# API credential retrieval -CLOUDTAMER_API_URL="https://login.mcp.nasa.gov/api/v3" -CLOUDTAMER_ROLE="mcp-tenantOperator" - # Must be the same as the path of the Persistent Volume mounted by the Airflow KubernetesPodOperator # that executes this script WORKING_DIR="/scratch" From 7cef1017a45e3fb4dd43ad630168635dcb1c669e Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Wed, 16 Oct 2024 21:42:24 -0700 Subject: [PATCH 13/32] attempt at fixing airflow URL munging --- airflow/helm/values.tmpl.yaml | 2 ++ terraform-unity/modules/terraform-unity-sps-airflow/main.tf | 2 ++ .../modules/terraform-unity-sps-ogc-processes-api/main.tf | 2 ++ 3 files changed, 6 insertions(+) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 449b57a2..d87fa1f9 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -250,6 +250,8 @@ config: encrypt_s3_logs: false celery: worker_concurrency: 16 + webserver: + enable_proxy_fix: 'True' dags: persistence: diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index b9acf1b5..8655e799 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -638,6 +638,8 @@ resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" ProxyPreserveHost On FallbackResource /management/index.html + RequestHeader setifempty "X-Forwarded-Proto" "http" + RequestHeader setifempty "X-Forwarded-Port" expr=%%{SERVER_PORT} AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/sps/$1|q" diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 7803b985..76a91577 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -390,6 +390,8 @@ resource "aws_ssm_parameter" "unity_proxy_ogc_api" { ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" ProxyPreserveHost On FallbackResource /management/index.html + RequestHeader setifempty "X-Forwarded-Proto" "http" + RequestHeader setifempty "X-Forwarded-Port" expr=%%{SERVER_PORT} AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/ogc/$1|q" From 9e093b78e2ec284192762a1e7c1b34446fc2216d Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Wed, 16 Oct 2024 23:13:52 -0700 Subject: [PATCH 14/32] yanking requestheader directives out, it turns out the issue was higher up --- terraform-unity/modules/terraform-unity-sps-airflow/main.tf | 2 -- .../modules/terraform-unity-sps-ogc-processes-api/main.tf | 2 -- 2 files changed, 4 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index 8655e799..b9acf1b5 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -638,8 +638,6 @@ resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" ProxyPreserveHost On FallbackResource /management/index.html - RequestHeader setifempty "X-Forwarded-Proto" "http" - RequestHeader setifempty "X-Forwarded-Port" expr=%%{SERVER_PORT} AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/sps/$1|q" diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 76a91577..7803b985 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -390,8 +390,6 @@ resource "aws_ssm_parameter" "unity_proxy_ogc_api" { ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" ProxyPreserveHost On FallbackResource /management/index.html - RequestHeader setifempty "X-Forwarded-Proto" "http" - RequestHeader setifempty "X-Forwarded-Port" expr=%%{SERVER_PORT} AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html Substitute "s|\"/([^\"]*)|\"/${var.project}/${var.venue}/ogc/$1|q" From 0682fa259cea2f9346e48302de9015cdc7acdb2b Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Sun, 1 Dec 2024 19:15:23 -0800 Subject: [PATCH 15/32] adding in proxy retry options --- terraform-unity/modules/terraform-unity-sps-airflow/README.md | 1 + terraform-unity/modules/terraform-unity-sps-airflow/main.tf | 2 +- .../modules/terraform-unity-sps-ogc-processes-api/main.tf | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/README.md b/terraform-unity/modules/terraform-unity-sps-airflow/README.md index f8db6278..bab93ee0 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/README.md +++ b/terraform-unity/modules/terraform-unity-sps-airflow/README.md @@ -69,6 +69,7 @@ No modules. | [kubernetes_storage_class.efs](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/storage_class) | resource | | [null_resource.remove_keda_finalizers](https://registry.terraform.io/providers/hashicorp/null/3.2.3/docs/resources/resource) | resource | | [random_id.airflow_webserver_secret](https://registry.terraform.io/providers/hashicorp/random/3.6.1/docs/resources/id) | resource | +| [time_sleep.wait_after_ssm](https://registry.terraform.io/providers/hashicorp/time/0.12.1/docs/resources/sleep) | resource | | [time_sleep.wait_for_efs_mount_target_dns_propagation](https://registry.terraform.io/providers/hashicorp/time/0.12.1/docs/resources/sleep) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/caller_identity) | data source | | [aws_db_instance.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/db_instance) | data source | diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index b9acf1b5..a9650e69 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -635,7 +635,7 @@ resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { Redirect "/${var.project}/${var.venue}/sps/home" - ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" + ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" retry=5 disablereuse=On ProxyPreserveHost On FallbackResource /management/index.html AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 7803b985..3ca2992c 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -387,7 +387,7 @@ resource "aws_ssm_parameter" "unity_proxy_ogc_api" { ProxyPassReverse "/" - ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" + ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" retry=5 disablereuse=On ProxyPreserveHost On FallbackResource /management/index.html AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html From a884f7d8ca1cfbc51156f7b7bd9c10c4251c57d6 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Mon, 2 Dec 2024 09:59:50 -0800 Subject: [PATCH 16/32] fixing lambda invocation naming apparently have been calling the wrong lambda the whole time --- terraform-unity/modules/terraform-unity-sps-airflow/main.tf | 4 ++-- .../modules/terraform-unity-sps-ogc-processes-api/main.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index a9650e69..80658129 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -653,8 +653,8 @@ EOT data "aws_lambda_functions" "lambda_check_all" {} resource "aws_lambda_invocation" "unity_proxy_lambda_invocation" { - count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "unity-${var.venue}-httpdproxymanagement") ? 1 : 0 - function_name = "unity-${var.venue}-httpdproxymanagement" + count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "${var.project}-${var.venue}-httpdproxymanagement") ? 1 : 0 + function_name = "${var.project}-${var.venue}-httpdproxymanagement" input = "{}" triggers = { redeployment = sha1(jsonencode([ diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 3ca2992c..defca7d8 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -405,8 +405,8 @@ EOT data "aws_lambda_functions" "lambda_check_all" {} resource "aws_lambda_invocation" "unity_proxy_lambda_invocation" { - count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "unity-${var.venue}-httpdproxymanagement") ? 1 : 0 - function_name = "unity-${var.venue}-httpdproxymanagement" + count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "${var.project}-${var.venue}-httpdproxymanagement") ? 1 : 0 + function_name = "${var.project}-${var.venue}-httpdproxymanagement" input = "{}" triggers = { redeployment = sha1(jsonencode([ From 83f560b5ad5f9c97f9462c571f7a13d7108b0aaf Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:46:43 -0500 Subject: [PATCH 17/32] Fix code formatting --- airflow/dags/cwl_dag_modular.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index a828b2eb..c3f1ac16 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -31,9 +31,7 @@ STAGE_IN_WORKFLOW = ( "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_in.cwl" ) -STAGE_OUT_WORKFLOW = ( - "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.cwl" -) +STAGE_OUT_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.cwl" LOCAL_DIR = "/shared-task-data" # The path of the working directory where the CWL workflow is executed @@ -191,21 +189,21 @@ def select_stage_out(ti): """Retrieve API key and account id from SSM parameter store.""" ssm_client = boto3.client("ssm", region_name="us-west-2") - aws_key = ssm_client.get_parameter( - Name=unity_sps_utils.DS_STAGE_OUT_AWS_KEY, WithDecryption=True - )["Parameter"]["Value"] + aws_key = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_KEY, WithDecryption=True)[ + "Parameter" + ]["Value"] logging.info("Retrieved stage out AWS access key.") ti.xcom_push(key="aws_key", value=aws_key) - aws_secret = ssm_client.get_parameter( - Name=unity_sps_utils.DS_STAGE_OUT_AWS_SECRET, WithDecryption=True - )["Parameter"]["Value"] + aws_secret = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_SECRET, WithDecryption=True)[ + "Parameter" + ]["Value"] logging.info("Retrieved stage out AWS access secret.") ti.xcom_push(key="aws_secret", value=aws_secret) - aws_token = ssm_client.get_parameter( - Name=unity_sps_utils.DS_STAGE_OUT_AWS_TOKEN, WithDecryption=True - )["Parameter"]["Value"] + aws_token = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_TOKEN, WithDecryption=True)[ + "Parameter" + ]["Value"] logging.info("Retrieved stage out AWS access token.") ti.xcom_push(key="aws_token", value=aws_token) @@ -271,7 +269,7 @@ def setup(ti=None, **context): "-s", "{{ ti.xcom_pull(task_ids='Setup', key='aws_secret') }}", "-t", - "{{ ti.xcom_pull(task_ids='Setup', key='aws_token') }}" + "{{ ti.xcom_pull(task_ids='Setup', key='aws_token') }}", ], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, From 2ca42c3c966eb819ba57d59a571b70c8bca08b37 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Sun, 6 Oct 2024 18:51:22 -0700 Subject: [PATCH 18/32] adjustments to work with marketplace requirements - adjusting eks module source branch - adding project/venue variables as args to eks - adding management-console required variables - fixing sps initiators module sources - adding mc-required variables to airflow terraform --- .../modules/terraform-unity-sps-eks/main.tf | 4 +++- .../terraform-unity-sps-eks/variables.tf | 20 +++++++++++++++++++ .../terraform-unity-sps-initiators/main.tf | 4 ++-- .../terraform-unity-sps-karpenter/README.md | 3 +++ .../variables.tf | 20 +++++++++++++++++++ terraform-unity/variables.tf | 20 +++++++++++++++++++ 6 files changed, 68 insertions(+), 3 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-eks/main.tf b/terraform-unity/modules/terraform-unity-sps-eks/main.tf index 777fd649..cf7f8664 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/main.tf @@ -9,8 +9,10 @@ terraform { } module "unity-eks" { - source = "git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=unity-sps-2.2.0-hotfix" + source = "git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=59-sps-eks-marketplace-adjustments" deployment_name = local.cluster_name + project = var.project + venue = var.venue nodegroups = var.nodegroups aws_auth_roles = [{ rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/mcp-tenantOperator" diff --git a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf index 3b36d1c3..9fe2e106 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf @@ -21,6 +21,26 @@ variable "release" { default = "24.3" } +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +} + variable "nodegroups" { description = "A map of node group configurations" type = map(object({ diff --git a/terraform-unity/modules/terraform-unity-sps-initiators/main.tf b/terraform-unity/modules/terraform-unity-sps-initiators/main.tf index 56893c70..0e9f6e8e 100644 --- a/terraform-unity/modules/terraform-unity-sps-initiators/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-initiators/main.tf @@ -78,7 +78,7 @@ resource "aws_s3_object" "router_config" { } module "unity_initiator" { - source = "git@github.com:unity-sds/unity-initiator.git//terraform-unity/initiator?ref=unity-sps-2.2.0" + source = "git::https://github.com/unity-sds/unity-initiator.git//terraform-unity/initiator?ref=unity-sps-2.2.0" code_bucket = aws_s3_bucket.code.id project = var.project router_config = "s3://${aws_s3_bucket.config.id}/${aws_s3_object.router_config.key}" @@ -91,7 +91,7 @@ resource "aws_s3_object" "isl_stacam_rawdp_folder" { } module "s3_bucket_notification" { - source = "git@github.com:unity-sds/unity-initiator.git//terraform-unity/triggers/s3-bucket-notification?ref=unity-sps-2.2.0" + source = "git::https://github.com/unity-sds/unity-initiator.git//terraform-unity/triggers/s3-bucket-notification?ref=unity-sps-2.2.0" initiator_topic_arn = module.unity_initiator.initiator_topic_arn isl_bucket = aws_s3_bucket.inbound_staging_location.id isl_bucket_prefix = "STACAM/RawDP/" diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md index c8d232be..05ff4958 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md @@ -35,10 +35,13 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"karpenter": {
"chart": "karpenter",
"repository": "oci://public.ecr.aws/karpenter",
"version": "1.0.2"
}
}
| no | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the cluster will be deployed (dev, test, prod) | `string` | n/a | yes | ## Outputs diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf index 84221fcd..a726aa87 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf @@ -21,6 +21,26 @@ variable "release" { default = "24.3" } +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +} + variable "helm_charts" { description = "Helm charts for the associated services." type = map(object({ diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index dfe3d4e5..97553c95 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -338,3 +338,23 @@ variable "dag_catalog_repo" { dags_directory_path = "airflow/dags" } } + +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +} \ No newline at end of file From 5198a8cd45936e48855975372c678d0a0d3416a3 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Fri, 6 Dec 2024 22:10:02 -0800 Subject: [PATCH 19/32] adding readme bits --- terraform-unity/README.md | 3 +++ terraform-unity/modules/terraform-unity-sps-eks/README.md | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/terraform-unity/README.md b/terraform-unity/README.md index 419b6baa..1ff1446d 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -191,7 +191,9 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [airflow\_webserver\_password](#input\_airflow\_webserver\_password) | The password for the Airflow webserver and UI. | `string` | n/a | yes | | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | | [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "2.2.0",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"airflow": {
"chart": "airflow",
"repository": "https://airflow.apache.org",
"version": "1.15.0"
},
"keda": {
"chart": "keda",
"repository": "https://kedacore.github.io/charts",
"version": "v2.15.1"
}
}
| no | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "300Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | | [karpenter\_node\_pools](#input\_karpenter\_node\_pools) | Configuration for Karpenter node pools |
map(object({
requirements : list(object({
key : string
operator : string
values : list(string)
}))
nodeClassRef : string
limits : object({
cpu : string
memory : string
})
disruption : object({
consolidationPolicy : string
consolidateAfter : string
})
}))
|
{
"airflow-celery-workers": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "80",
"memory": "320Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"9"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-core-components": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "40",
"memory": "160Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "100",
"memory": "400Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator-high-workload": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "528",
"memory": "1056Gi"
},
"nodeClassRef": "airflow-kubernetes-pod-operator-high-workload",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"49"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"98305"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
}
}
| no | | [kubeconfig\_filepath](#input\_kubeconfig\_filepath) | The path to the kubeconfig file for the Kubernetes cluster. | `string` | n/a | yes | @@ -200,6 +202,7 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [project](#input\_project) | The project or mission deploying Unity SPS. | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed. | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the resources will be deployed. | `string` | n/a | yes | ## Outputs diff --git a/terraform-unity/modules/terraform-unity-sps-eks/README.md b/terraform-unity/modules/terraform-unity-sps-eks/README.md index 24bc32aa..6030c029 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/README.md +++ b/terraform-unity/modules/terraform-unity-sps-eks/README.md @@ -18,7 +18,7 @@ | Name | Source | Version | |------|--------|---------| -| [unity-eks](#module\_unity-eks) | git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.2.0-hotfix | +| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | 59-sps-eks-marketplace-adjustments | ## Resources @@ -31,10 +31,13 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the cluster will be deployed (dev, test, prod) | `string` | n/a | yes | ## Outputs From 387e909e06709e6c7dbdf3c705734204cfc1dffb Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Sun, 8 Dec 2024 23:38:31 -0800 Subject: [PATCH 20/32] fixing pre-commit complaints --- terraform-unity/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index 97553c95..da6db37e 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -357,4 +357,4 @@ variable "installprefix" { description = "The install prefix for the service area (unused)" type = string default = "" -} \ No newline at end of file +} From eb43d2f91322807ff7bda5e2a2fa63839bf5946e Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Mon, 9 Dec 2024 10:15:46 -0800 Subject: [PATCH 21/32] adding reasonable default for deployment_name --- terraform-unity/README.md | 2 +- terraform-unity/variables.tf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform-unity/README.md b/terraform-unity/README.md index 1ff1446d..742bfdab 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -191,7 +191,7 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [airflow\_webserver\_password](#input\_airflow\_webserver\_password) | The password for the Airflow webserver and UI. | `string` | n/a | yes | | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | | [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "2.2.0",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | -| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"airflow": {
"chart": "airflow",
"repository": "https://airflow.apache.org",
"version": "1.15.0"
},
"keda": {
"chart": "keda",
"repository": "https://kedacore.github.io/charts",
"version": "v2.15.1"
}
}
| no | | [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "300Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index da6db37e..d0843269 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -343,6 +343,7 @@ variable "dag_catalog_repo" { variable "deployment_name" { description = "The name of the deployment." type = string + default = "" } # tflint-ignore: terraform_unused_declarations From a6c36ebf938a9819446fe64b54db949101af9ad3 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Mon, 9 Dec 2024 11:18:52 -0800 Subject: [PATCH 22/32] updating to new cs-infra tag --- terraform-unity/modules/terraform-unity-sps-eks/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform-unity/modules/terraform-unity-sps-eks/main.tf b/terraform-unity/modules/terraform-unity-sps-eks/main.tf index cf7f8664..20fae0fa 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/main.tf @@ -9,7 +9,7 @@ terraform { } module "unity-eks" { - source = "git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=59-sps-eks-marketplace-adjustments" + source = "git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=unity-sps-2.4.0" deployment_name = local.cluster_name project = var.project venue = var.venue From df59659bc054b09d97612cb0e4c5a5078b652857 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:30:00 -0500 Subject: [PATCH 23/32] Update to new DS container image - Restructure entrypoint to handle file i/o between tasks - Update DAG to pass in stage out arguments and STAC JSON - Remove entrypoint utility script --- airflow/dags/cwl_dag_modular.py | 69 ++++-------- airflow/docker/cwl/Dockerfile_modular | 2 - .../cwl/docker_cwl_entrypoint_modular.sh | 100 +++++++++--------- .../docker/cwl/docker_cwl_entrypoint_utils.py | 57 ---------- airflow/plugins/unity_sps_utils.py | 8 +- 5 files changed, 72 insertions(+), 164 deletions(-) delete mode 100644 airflow/docker/cwl/docker_cwl_entrypoint_utils.py diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index c3f1ac16..6f1bfa7b 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -28,9 +28,7 @@ from airflow import DAG # Task constants -STAGE_IN_WORKFLOW = ( - "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/stage_in.cwl" -) +STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_in.cwl" STAGE_OUT_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.cwl" LOCAL_DIR = "/shared-task-data" @@ -40,14 +38,11 @@ WORKING_DIR = "/scratch" # Default parameters +DEFAULT_STAC_JSON = "https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/stage_in_results.json" DEFAULT_PROCESS_WORKFLOW = ( "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/process.cwl" ) DEFAULT_PROCESS_ARGS = json.dumps({"example_argument_empty": ""}) -DEFAULT_STAGE_IN_ARGS = "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/test/ogc_app_package/stage_in.yml" -DEFAULT_STAGE_OUT_ARGS = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.yaml" -DEFAULT_STAGE_OUT_BUCKET = "unity-dev-unity-unity-data" -DEFAULT_COLLECTION_ID = "example-app-collection___3" # Alternative arguments to execute SBG Pre-Process # DEFAULT_PROCESS_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" @@ -94,11 +89,11 @@ max_active_tasks=30, default_args=dag_default_args, params={ - "stage_in_args": Param( - DEFAULT_STAGE_IN_ARGS, + "stac_json": Param( + DEFAULT_STAC_JSON, type="string", - title="Stage in workflow parameters", - description="The stage in job parameters encoded as a JSON string or the URL of a JSON or YAML file", + title="STAC JSON", + description="STAC JSON data to download granules encoded as a JSON string or the URL of a JSON or YAML file", ), "process_workflow": Param( DEFAULT_PROCESS_WORKFLOW, @@ -114,18 +109,6 @@ "The processing job parameters encoded as a JSON string," "or the URL of a JSON or YAML file" ), ), - "stage_out_bucket": Param( - DEFAULT_STAGE_OUT_BUCKET, - type="string", - title="Stage out S3 bucket", - description="S3 bucket to stage data out to", - ), - "collection_id": Param( - DEFAULT_COLLECTION_ID, - type="string", - title="Output collection identifier", - description="Collection identifier to use for output (processed) data", - ), "request_memory": Param( "4Gi", type="string", @@ -186,26 +169,24 @@ def select_ecr(ti, use_ecr): def select_stage_out(ti): - """Retrieve API key and account id from SSM parameter store.""" + """Retrieve stage out input parameters from SSM parameter store.""" ssm_client = boto3.client("ssm", region_name="us-west-2") - aws_key = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_KEY, WithDecryption=True)[ + project = ssm_client.get_parameter(Name=unity_sps_utils.SPS_PROJECT_PARAM, WithDecryption=True)[ "Parameter" ]["Value"] - logging.info("Retrieved stage out AWS access key.") - ti.xcom_push(key="aws_key", value=aws_key) - aws_secret = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_SECRET, WithDecryption=True)[ - "Parameter" - ]["Value"] - logging.info("Retrieved stage out AWS access secret.") - ti.xcom_push(key="aws_secret", value=aws_secret) + venue = ssm_client.get_parameter(Name=unity_sps_utils.SPS_VENUE_PARAM, WithDecryption=True)["Parameter"][ + "Value" + ] - aws_token = ssm_client.get_parameter(Name=unity_sps_utils.DS_STAGE_OUT_AWS_TOKEN, WithDecryption=True)[ + staging_bucket = ssm_client.get_parameter(Name=unity_sps_utils.DS_S3_BUCKET_PARAM, WithDecryption=True)[ "Parameter" ]["Value"] - logging.info("Retrieved stage out AWS access token.") - ti.xcom_push(key="aws_token", value=aws_token) + + stage_out_args = json.dumps({"project": project, "venue": venue, "staging_bucket": staging_bucket}) + logging.info(f"Selecting stage out args={stage_out_args}") + ti.xcom_push(key="stage_out_args", value=stage_out_args) def setup(ti=None, **context): @@ -250,26 +231,18 @@ def setup(ti=None, **context): arguments=[ "-i", STAGE_IN_WORKFLOW, - "-k", - "{{ params.stage_in_args }}", + "-s", + "{{ params.stac_json }}", "-w", "{{ params.process_workflow }}", "-j", "{{ params.process_args }}", - "-f", + "-o", STAGE_OUT_WORKFLOW, + "-d", + "{{ ti.xcom_pull(task_ids='Setup', key='stage_out_args') }}", "-e", "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", - "-c", - "{{ params.collection_id }}", - "-b", - "{{ params.stage_out_bucket }}", - "-a", - "{{ ti.xcom_pull(task_ids='Setup', key='aws_key') }}", - "-s", - "{{ ti.xcom_pull(task_ids='Setup', key='aws_secret') }}", - "-t", - "{{ ti.xcom_pull(task_ids='Setup', key='aws_token') }}", ], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, diff --git a/airflow/docker/cwl/Dockerfile_modular b/airflow/docker/cwl/Dockerfile_modular index 6cca6389..d3d3314f 100644 --- a/airflow/docker/cwl/Dockerfile_modular +++ b/airflow/docker/cwl/Dockerfile_modular @@ -20,8 +20,6 @@ RUN apk add --no-cache nodejs npm # script to execute a generic CWL workflow with arguments COPY docker_cwl_entrypoint_modular.sh /usr/share/cwl/docker_cwl_entrypoint_modular.sh -COPY docker_cwl_entrypoint_utils.py /usr/share/cwl/docker_cwl_entrypoint_utils.py -RUN chmod +x /usr/share/cwl/docker_cwl_entrypoint_utils.py WORKDIR /usr/share/cwl ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint_modular.sh"] diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh index e474081c..e7494d7a 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -1,23 +1,23 @@ #!/bin/sh # Script to execute a CWL workflow that includes Docker containers # The Docker engine is started before the CWL execution, and stopped afterwards. -# -w: the CWL workflow URL +# -i: The CWL workflow URL for the stage in task +# -s: STAC JSON URL or JSON data that describes input data requiring download +# -w: the CWL workflow URL for the process task # (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.cwl) -# -j: a) the CWL job parameters as a JSON formatted string +# -j: a) the CWL process job parameters as a JSON formatted string # (example: { "name": "John Doe" }) # OR b) The URL of a YAML or JSON file containing the job parameters # (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.dev.yml) +# -o: The CWL workflow URL for the stage out task +# -d: The CWL stage out job parameters as a JSON formatted string # -e: the ECR login URL where the AWS account ID and region are specific to the Airflow installation # (example: .dkr.ecr..amazonaws.com) [optional] -# -o: path to an output JSON file that needs to be shared as Airflow "xcom" data [optional] -# -c: collection identifier for process task created collection and stage out task upload -# -b: stage out s3 bucket to upload processed data to -# -a: Cloudtamer API key with permissions to retrieve temporary AWS credentials -# -s: AWS account ID to retrieve credentials for +# -f: path to an output JSON file that needs to be shared as Airflow "xcom" data [optional] -# Must be the same as the path of the Persistent Volume mounted by the Airflow KubernetesPodOperator -# that executes this script -WORKING_DIR="/scratch" +# Can be the same as the path of the Persistent Volume mounted by the Airflow KubernetesPodOperator +# that executes this script to execute on EFS. +WORKING_DIR="/data" # Set to EBS directory get_job_args() { local job_args=$1 @@ -37,21 +37,17 @@ get_job_args() { } set -ex -while getopts i:k:w:j:e:o:c:b:a:s:f:t: flag +while getopts i:s:w:j:o:d:e:f: flag do case "${flag}" in i) cwl_workflow_stage_in=${OPTARG};; - k) job_args_stage_in=${OPTARG};; + s) stac_json=${OPTARG};; w) cwl_workflow_process=${OPTARG};; j) job_args_process=${OPTARG};; - f) cwl_workflow_stage_out=${OPTARG};; + o) cwl_workflow_stage_out=${OPTARG};; + d) job_args_stage_out=${OPTARG};; e) ecr_login=${OPTARG};; - o) json_output=${OPTARG};; - c) collection_id=${OPTARG};; - b) bucket=${OPTARG};; - a) aws_key=${OPTARG};; - s) aws_secret=${OPTARG};; - t) aws_token=${OPTARG};; + f) json_output=${OPTARG};; esac done @@ -59,17 +55,6 @@ done mkdir -p "$WORKING_DIR" cd $WORKING_DIR -# stage in job args -rm -f ./job_args_stage_in.json -job_args_stage_in="$(get_job_args "$job_args_stage_in" stage_in)" -echo JOB_ARGS_STAGE_IN $job_args_stage_in -echo "Executing the CWL workflow: $cwl_workflow_stage_in with json arguments: $job_args_stage_in and working directory: $WORKING_DIR" - -# process job args -rm -rf ./job_args_process.json -job_args_process="$(get_job_args "$job_args_process" process)" -echo "Executing the CWL workflow: $cwl_workflow_process with json arguments: $job_args_process and working directory: $WORKING_DIR" - echo "JSON XCOM output: ${json_output}" # Start Docker engine @@ -95,41 +80,52 @@ echo "Logged into: $ecr_login" fi # Stage in operations -stage_in=$(cwltool --outdir stage_in --copy-output $cwl_workflow_stage_in $job_args_stage_in) +echo "Executing the CWL workflow: $cwl_workflow_stage_in with STAC JSON: $stac_json and working directory: $WORKING_DIR" +stage_in=$(cwltool --outdir stage_in --copy-output $cwl_workflow_stage_in --download_dir granules --stac_json $stac_json) -# Get directory that contains downloads -stage_in_dir=$(echo $stage_in | jq '.stage_in_download_dir.basename') -stage_in_dir="$PWD/stage_in/$(echo "$stage_in_dir" | tr -d '"')" +# Retrieve directory that contains downloaded granules +stage_in_dir=$(echo $stage_in | jq '.download_dir.path') +stage_in_dir=$(echo "$stage_in_dir" | tr -d '"') echo "Stage in download directory: $stage_in_dir" ls -l $stage_in_dir/ -# Remove extraneous directory in front of catalog.json -echo "Editing stage in catalog.json" -/usr/share/cwl/docker_cwl_entrypoint_utils.py -c "$stage_in_dir/catalog.json" +# Format process job args +rm -rf ./job_args_process.json +job_args_process="$(get_job_args "$job_args_process" process)" -# Add input directory and output collection into process job arguments -echo "Editing process $job_args_process" -/usr/share/cwl/docker_cwl_entrypoint_utils.py -j $job_args_process -i $stage_in_dir -d $collection_id +# Add granule directory into process job arguments +echo "Updating process arguments with input directory: $job_args_process" +job_args_process_updated=./job_args_process_updated.json +cat $job_args_process | jq --arg data_dir $stage_in_dir '. += {"input": {"class": "Directory", "path": $data_dir}}' > $job_args_process_updated +mv $job_args_process_updated $job_args_process +echo "Executing the CWL workflow: $cwl_workflow_process with json arguments: $job_args_process and working directory: $WORKING_DIR" # Process operations -process=$(cwltool $cwl_workflow_process $job_args_process) +process=$(cwltool --outdir process $cwl_workflow_process $job_args_process) +echo $process # Get directory that contains processed files -process_dir=$(echo $process | jq '.output.basename') -process_dir="$PWD/$(echo "$process_dir" | tr -d '"')" +process_dir=$(echo $process | jq '.output.path') +process_dir=$(echo "$process_dir" | tr -d '"') echo "Process output directory: $process_dir" ls -l $process_dir +# Add process directory into stage out job arguments +echo "Editing stage out arguments: $job_args_stage_out" +echo $job_args_stage_out | jq --arg data_dir $process_dir '. += {"sample_output_data": {"class": "Directory", "path": $data_dir}}' > ./job_args_stage_out.json +echo "Executing the CWL workflow: $cwl_workflow_stage_out with json arguments: job_args_stage_out.json and working directory: $WORKING_DIR" + # Stage out operations -stage_out=$(cwltool $cwl_workflow_stage_out \ - --output_dir $process_dir \ - --staging_bucket $bucket \ - --collection_id $collection_id \ - --aws_access_key_id $aws_key \ - --aws_secret_access_key $aws_secret \ - --aws_session_token $aws_token) -stage_out=$(echo "$stage_out" | jq 'map(.path)' | tr -d "[]\",\\t ") -echo "Stage out files: $stage_out" +stage_out=$(cwltool --outdir stage_out $cwl_workflow_stage_out job_args_stage_out.json) + +# Report on stage out +successful_features=$(echo "$stage_out" | jq '.successful_features.path' | tr -d "[]\",\\t ") +successful_features=$(cat $successful_features | jq '.') +echo Successful features: $successful_features + +failed_features=$(echo "$stage_out" | jq '.failed_features.path' | tr -d "[]\",\\t ") +failed_features=$(cat $failed_features | jq '.') +echo Failed features: $failed_features # Optionally, save the requested output file to a location # where it will be picked up by the Airflow XCOM mechanism diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_utils.py b/airflow/docker/cwl/docker_cwl_entrypoint_utils.py deleted file mode 100644 index d8b28437..00000000 --- a/airflow/docker/cwl/docker_cwl_entrypoint_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/share/cwl/venv/bin/python3 - -import argparse -import json - -import yaml - - -def create_args(): - """Create and return argparser.""" - - arg_parser = argparse.ArgumentParser(description="Retrieve entrypoint utilities arguments") - arg_parser.add_argument("-c", "--catalogjson", type=str, default="", help="Path to catalog JSON file") - arg_parser.add_argument("-j", "--jobargs", type=str, default="", help="Process CWL job argument file") - arg_parser.add_argument("-i", "--processinput", type=str, default="", help="Process input directory") - arg_parser.add_argument( - "-d", "--collectionid", type=str, default="", help="Process and stage out collection identifier" - ) - return arg_parser - - -def update_catalog_json(catalog_json): - """Remove extra root directory in catalog.json file.""" - - with open(catalog_json) as jf: - catalog_data = json.load(jf) - - for link in catalog_data["links"]: - if link["rel"] == "root": - link["href"] = "catalog.json" - - with open(catalog_json, "w") as jf: - json.dump(catalog_data, jf, indent=2) - - -def update_process_job_args(job_args, process_input, collection_id): - """Update job arguments with input directory.""" - - with open(job_args) as fh: - if job_args.endswith("yaml") or job_args.endswith("yml"): - json_data = yaml.safe_load(fh) - else: - json_data = json.load(fh) - json_data["input"] = {"class": "Directory", "path": process_input} - json_data["output_collection"] = collection_id - with open(job_args, "w") as jf: - json.dump(json_data, jf) - - -if __name__ == "__main__": - arg_parser = create_args() - args = arg_parser.parse_args() - if args.catalogjson: - update_catalog_json(args.catalogjson) - - if args.jobargs: - update_process_job_args(args.jobargs, args.processinput, args.collectionid) diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index 4c58b65f..e4123712 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -14,11 +14,9 @@ NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" -DS_CLIENT_ID_PARAM = "/unity/shared-services/cognito/hysds-ui-client-id" -SS_ACT_NUM = "/unity/shared-services/aws/account" -DS_STAGE_OUT_AWS_KEY = "/unity-nikki-1/dev/sps/processing/aws-key" -DS_STAGE_OUT_AWS_SECRET = "/unity-nikki-1/dev/sps/processing/aws-secret" -DS_STAGE_OUT_AWS_TOKEN = "/unity-nikki-1/dev/sps/processing/aws-token" +SPS_PROJECT_PARAM = "/unity/unity-nikki-1/dev/project-name" +SPS_VENUE_PARAM = "/unity/unity-nikki-1/dev/venue-name" +DS_S3_BUCKET_PARAM = "/unity/unity-nikki-1/dev/ds/staging/s3/bucket-name" class SpsKubernetesPodOperator(KubernetesPodOperator): From 8336f995abf4dc89dcb9d98484ecba5df5377ee1 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 10 Dec 2024 07:49:01 -0700 Subject: [PATCH 24/32] Adding empty "" for deployment name for EKS and Karpenter --- terraform-unity/modules/terraform-unity-sps-eks/README.md | 4 ++-- terraform-unity/modules/terraform-unity-sps-eks/variables.tf | 1 + .../modules/terraform-unity-sps-karpenter/README.md | 2 +- .../modules/terraform-unity-sps-karpenter/variables.tf | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-eks/README.md b/terraform-unity/modules/terraform-unity-sps-eks/README.md index 6030c029..6761714a 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/README.md +++ b/terraform-unity/modules/terraform-unity-sps-eks/README.md @@ -18,7 +18,7 @@ | Name | Source | Version | |------|--------|---------| -| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | 59-sps-eks-marketplace-adjustments | +| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.4.0 | ## Resources @@ -31,7 +31,7 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | diff --git a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf index 9fe2e106..b8a4be46 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf @@ -25,6 +25,7 @@ variable "release" { variable "deployment_name" { description = "The name of the deployment." type = string + default = "" } # tflint-ignore: terraform_unused_declarations diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md index 05ff4958..5a58d601 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md @@ -35,7 +35,7 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"karpenter": {
"chart": "karpenter",
"repository": "oci://public.ecr.aws/karpenter",
"version": "1.0.2"
}
}
| no | | [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf index a726aa87..e9008955 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf @@ -25,6 +25,7 @@ variable "release" { variable "deployment_name" { description = "The name of the deployment." type = string + default = "" } # tflint-ignore: terraform_unused_declarations From 54c4936d9a41a998dd32f9c5ab5c62b5b94bb9bc Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Fri, 13 Dec 2024 23:03:53 -0800 Subject: [PATCH 25/32] commenting out jpl-internal ingress also switching ssm parameters to use shared-services URLs (and adding in the SSM lookups required to fetch those) --- .../terraform-unity-sps-airflow/README.md | 9 +++---- .../terraform-unity-sps-airflow/data.tf | 16 +++++++++++- .../terraform-unity-sps-airflow/main.tf | 25 ++++++++++++------- .../README.md | 8 +++--- .../data.tf | 16 +++++++++++- .../main.tf | 20 +++++++++------ 6 files changed, 65 insertions(+), 29 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/README.md b/terraform-unity/modules/terraform-unity-sps-airflow/README.md index bab93ee0..df3a4f89 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/README.md +++ b/terraform-unity/modules/terraform-unity-sps-airflow/README.md @@ -42,7 +42,6 @@ No modules. | [aws_s3_bucket.airflow_logs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/s3_bucket) | resource | | [aws_s3_bucket_policy.airflow_logs_s3_policy](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/s3_bucket_policy) | resource | | [aws_security_group.airflow_efs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | -| [aws_security_group.airflow_ingress_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group.airflow_ingress_sg_internal](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group_rule.airflow_efs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group_rule) | resource | | [aws_ssm_parameter.airflow_api_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | @@ -51,11 +50,9 @@ No modules. | [aws_ssm_parameter.airflow_ui_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.airflow_ui_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.unity_proxy_airflow_ui](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | -| [aws_vpc_security_group_ingress_rule.airflow_ingress_sg_jpl_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [aws_vpc_security_group_ingress_rule.airflow_ingress_sg_proxy_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [helm_release.airflow](https://registry.terraform.io/providers/hashicorp/helm/2.15.0/docs/resources/release) | resource | | [helm_release.keda](https://registry.terraform.io/providers/hashicorp/helm/2.15.0/docs/resources/release) | resource | -| [kubernetes_ingress_v1.airflow_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_ingress_v1.airflow_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_namespace.keda](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/namespace) | resource | | [kubernetes_persistent_volume.airflow_deployed_dags](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/persistent_volume) | resource | @@ -69,7 +66,6 @@ No modules. | [kubernetes_storage_class.efs](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/storage_class) | resource | | [null_resource.remove_keda_finalizers](https://registry.terraform.io/providers/hashicorp/null/3.2.3/docs/resources/resource) | resource | | [random_id.airflow_webserver_secret](https://registry.terraform.io/providers/hashicorp/random/3.6.1/docs/resources/id) | resource | -| [time_sleep.wait_after_ssm](https://registry.terraform.io/providers/hashicorp/time/0.12.1/docs/resources/sleep) | resource | | [time_sleep.wait_for_efs_mount_target_dns_propagation](https://registry.terraform.io/providers/hashicorp/time/0.12.1/docs/resources/sleep) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/caller_identity) | data source | | [aws_db_instance.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/db_instance) | data source | @@ -78,10 +74,11 @@ No modules. | [aws_lambda_functions.lambda_check_all](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/lambda_functions) | data source | | [aws_secretsmanager_secret_version.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/secretsmanager_secret_version) | data source | | [aws_security_groups.venue_proxy_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/security_groups) | data source | -| [aws_ssm_parameter.ssl_cert_arn](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_account](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | -| [kubernetes_ingress_v1.airflow_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_ingress_v1.airflow_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 2e136b5a..3b805679 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -18,12 +18,13 @@ data "kubernetes_namespace" "service_area" { } } +/* Note: re-enable this to allow access via the JPL network data "kubernetes_ingress_v1" "airflow_ingress" { metadata { name = kubernetes_ingress_v1.airflow_ingress.metadata[0].name namespace = data.kubernetes_namespace.service_area.metadata[0].name } -} +}*/ data "kubernetes_ingress_v1" "airflow_ingress_internal" { metadata { @@ -44,6 +45,19 @@ data "aws_efs_file_system" "efs" { file_system_id = var.efs_file_system_id } +/* Note: re-enable this to allow access via the JPL network data "aws_ssm_parameter" "ssl_cert_arn" { name = "/unity/account/network/ssl" +}*/ + +data "aws_ssm_parameter" "shared_services_account" { + name = "/unity/shared-services/aws/account" } + +data "aws_ssm_parameter" "shared_services_region" { + name = "/unity/shared-services/aws/account/region" +} + +data "aws_ssm_parameter" "shared_services_domain" { + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region}:${data.aws_ssm_parameter.shared_services_account}:parameter/unity/shared-services/domain" +} \ No newline at end of file diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index 80658129..a7e838bb 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -430,6 +430,7 @@ resource "helm_release" "airflow" { ] } +/* Note: re-enable this to allow access via the JPL network resource "aws_security_group" "airflow_ingress_sg" { name = "${var.project}-${var.venue}-airflow-ingress-sg" description = "SecurityGroup for Airflow LoadBalancer ingress" @@ -439,7 +440,7 @@ resource "aws_security_group" "airflow_ingress_sg" { Component = "airflow" Stack = "airflow" }) -} +}*/ resource "aws_security_group" "airflow_ingress_sg_internal" { name = "${var.project}-${var.venue}-airflow-internal-ingress-sg" @@ -452,6 +453,7 @@ resource "aws_security_group" "airflow_ingress_sg_internal" { }) } +/* Note: re-enable this to allow access via the JPL network #tfsec:ignore:AVD-AWS-0107 resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_jpl_rule" { for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) @@ -461,7 +463,7 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_jpl_rule" { from_port = local.load_balancer_port to_port = local.load_balancer_port cidr_ipv4 = each.key -} +}*/ data "aws_security_groups" "venue_proxy_sg" { filter { @@ -484,6 +486,7 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_proxy_rule" { referenced_security_group_id = data.aws_security_groups.venue_proxy_sg.ids[0] } +/* Note: re-enable this to allow access via the JPL network resource "kubernetes_ingress_v1" "airflow_ingress" { metadata { name = "airflow-ingress" @@ -521,7 +524,7 @@ resource "kubernetes_ingress_v1" "airflow_ingress" { } wait_for_load_balancer = true depends_on = [helm_release.airflow] -} +}*/ resource "kubernetes_ingress_v1" "airflow_ingress_internal" { metadata { @@ -564,12 +567,13 @@ resource "aws_ssm_parameter" "airflow_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "ui_url"]))) description = "The URL of the Airflow UI." type = "String" - value = "https://${data.kubernetes_ingress_v1.airflow_ingress.status[0].load_balancer[0].ingress[0].hostname}:5000" + value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_ui") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { @@ -578,8 +582,8 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow UI" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_ui") @@ -589,18 +593,20 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "api_url"]))) description = "The URL of the Airflow REST API." type = "String" - value = "https://${data.kubernetes_ingress_v1.airflow_ingress.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1" + value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_api") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { @@ -609,8 +615,8 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow API" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_api") @@ -620,6 +626,7 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md index 124e403f..04390dc9 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md @@ -23,17 +23,14 @@ No modules. | Name | Type | |------|------| | [aws_lambda_invocation.unity_proxy_lambda_invocation](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/lambda_invocation) | resource | -| [aws_security_group.ogc_ingress_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group.ogc_ingress_sg_internal](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_ssm_parameter.ogc_processes_api_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.ogc_processes_api_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.ogc_processes_ui_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.unity_proxy_ogc_api](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | -| [aws_vpc_security_group_ingress_rule.ogc_ingress_sg_jpl_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [aws_vpc_security_group_ingress_rule.ogc_ingress_sg_proxy_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [kubernetes_deployment.ogc_processes_api](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/deployment) | resource | | [kubernetes_deployment.redis](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/deployment) | resource | -| [kubernetes_ingress_v1.ogc_processes_api_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_ingress_v1.ogc_processes_api_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_service.ogc_processes_api](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/service) | resource | | [kubernetes_service.redis](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/service) | resource | @@ -42,10 +39,11 @@ No modules. | [aws_lambda_functions.lambda_check_all](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/lambda_functions) | data source | | [aws_secretsmanager_secret_version.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/secretsmanager_secret_version) | data source | | [aws_security_groups.venue_proxy_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/security_groups) | data source | -| [aws_ssm_parameter.ssl_cert_arn](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_account](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | -| [kubernetes_ingress_v1.ogc_processes_api_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_ingress_v1.ogc_processes_api_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | | [kubernetes_persistent_volume_claim.airflow_deployed_dags](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/persistent_volume_claim) | data source | diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf index d76a7e60..70fa123e 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf @@ -30,12 +30,13 @@ data "kubernetes_persistent_volume_claim" "airflow_deployed_dags" { } } +/* Note: re-enable this to allow access via the JPL network data "kubernetes_ingress_v1" "ogc_processes_api_ingress" { metadata { name = kubernetes_ingress_v1.ogc_processes_api_ingress.metadata[0].name namespace = data.kubernetes_namespace.service_area.metadata[0].name } -} +}*/ data "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { metadata { @@ -44,6 +45,19 @@ data "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { } } +/* Note: re-enable this to allow access via the JPL network data "aws_ssm_parameter" "ssl_cert_arn" { name = "/unity/account/network/ssl" +}*/ + +data "aws_ssm_parameter" "shared_services_account" { + name = "/unity/shared-services/aws/account" } + +data "aws_ssm_parameter" "shared_services_region" { + name = "/unity/shared-services/aws/account/region" +} + +data "aws_ssm_parameter" "shared_services_domain" { + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region}:${data.aws_ssm_parameter.shared_services_account}:parameter/unity/shared-services/domain" +} \ No newline at end of file diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index defca7d8..efbb1edf 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -206,6 +206,7 @@ resource "kubernetes_service" "ogc_processes_api" { } } +/* Note: re-enable this to allow access via the JPL network resource "aws_security_group" "ogc_ingress_sg" { name = "${var.project}-${var.venue}-ogc-ingress-sg" description = "SecurityGroup for OGC API LoadBalancer ingress" @@ -215,7 +216,7 @@ resource "aws_security_group" "ogc_ingress_sg" { Component = "ogc" Stack = "ogc" }) -} +}*/ resource "aws_security_group" "ogc_ingress_sg_internal" { name = "${var.project}-${var.venue}-ogc-internal-ingress-sg" @@ -228,6 +229,7 @@ resource "aws_security_group" "ogc_ingress_sg_internal" { }) } +/* Note: re-enable this to allow access via the JPL network #tfsec:ignore:AVD-AWS-0107 resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_jpl_rule" { for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) @@ -237,7 +239,7 @@ resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_jpl_rule" { from_port = local.load_balancer_port to_port = local.load_balancer_port cidr_ipv4 = each.key -} +}*/ data "aws_security_groups" "venue_proxy_sg" { filter { @@ -260,6 +262,7 @@ resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_proxy_rule" { referenced_security_group_id = data.aws_security_groups.venue_proxy_sg.ids[0] } +/* Note: re-enable this to allow access via the JPL network resource "kubernetes_ingress_v1" "ogc_processes_api_ingress" { metadata { name = "ogc-processes-api-ingress" @@ -296,7 +299,7 @@ resource "kubernetes_ingress_v1" "ogc_processes_api_ingress" { } } wait_for_load_balancer = true -} +}*/ resource "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { metadata { @@ -338,24 +341,26 @@ resource "aws_ssm_parameter" "ogc_processes_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "ui_url"]))) description = "The URL of the OGC Proccesses API Docs UI." type = "String" - value = "https://${data.kubernetes_ingress_v1.ogc_processes_api_ingress.status[0].load_balancer[0].ingress[0].hostname}:5001/redoc" + value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/redoc" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_ui") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "ogc_processes_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "api_url"]))) description = "The URL of the OGC Processes REST API." type = "String" - value = "https://${data.kubernetes_ingress_v1.ogc_processes_api_ingress.status[0].load_balancer[0].ingress[0].hostname}:5001" + value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_api") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { @@ -364,8 +369,8 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "OGC API" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-ogc_processes_api") @@ -375,6 +380,7 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "unity_proxy_ogc_api" { From 7607069eebfa23aebe0a48a0a4b05d1f407a7646 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Fri, 13 Dec 2024 23:46:49 -0800 Subject: [PATCH 26/32] forgot to drill down to ssm param value --- .../modules/terraform-unity-sps-airflow/data.tf | 2 +- .../modules/terraform-unity-sps-airflow/main.tf | 12 ++++++------ .../terraform-unity-sps-ogc-processes-api/data.tf | 2 +- .../terraform-unity-sps-ogc-processes-api/main.tf | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 3b805679..95200086 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -59,5 +59,5 @@ data "aws_ssm_parameter" "shared_services_region" { } data "aws_ssm_parameter" "shared_services_domain" { - name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region}:${data.aws_ssm_parameter.shared_services_account}:parameter/unity/shared-services/domain" + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" } \ No newline at end of file diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index a7e838bb..93b8765d 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -567,7 +567,7 @@ resource "aws_ssm_parameter" "airflow_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "ui_url"]))) description = "The URL of the Airflow UI." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/" + value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_ui") Component = "SSM" @@ -582,8 +582,8 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow UI" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_ui") @@ -600,7 +600,7 @@ resource "aws_ssm_parameter" "airflow_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "api_url"]))) description = "The URL of the Airflow REST API." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1" + value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_api") Component = "SSM" @@ -615,8 +615,8 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow API" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/sps/api/v1" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_api") diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf index 70fa123e..89819b1c 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf @@ -59,5 +59,5 @@ data "aws_ssm_parameter" "shared_services_region" { } data "aws_ssm_parameter" "shared_services_domain" { - name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region}:${data.aws_ssm_parameter.shared_services_account}:parameter/unity/shared-services/domain" + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" } \ No newline at end of file diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index efbb1edf..9b9e1aa5 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -341,7 +341,7 @@ resource "aws_ssm_parameter" "ogc_processes_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "ui_url"]))) description = "The URL of the OGC Proccesses API Docs UI." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/redoc" + value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/redoc" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_ui") Component = "SSM" @@ -354,7 +354,7 @@ resource "aws_ssm_parameter" "ogc_processes_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "api_url"]))) description = "The URL of the OGC Processes REST API." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/" + value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_api") Component = "SSM" @@ -369,8 +369,8 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "OGC API" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain}:4443/${var.project}/${var.venue}/ogc/" + "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/health" + "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-ogc_processes_api") From 4edd5eb5e7e9878f4dd4f158cf9cb36a6c34e0ee Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Sat, 14 Dec 2024 00:04:12 -0800 Subject: [PATCH 27/32] shared services URLs are apparently all prepended by www --- .../modules/terraform-unity-sps-airflow/main.tf | 12 ++++++------ .../terraform-unity-sps-ogc-processes-api/main.tf | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index 93b8765d..47732eba 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -567,7 +567,7 @@ resource "aws_ssm_parameter" "airflow_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "ui_url"]))) description = "The URL of the Airflow UI." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_ui") Component = "SSM" @@ -582,8 +582,8 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow UI" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_ui") @@ -600,7 +600,7 @@ resource "aws_ssm_parameter" "airflow_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "api_url"]))) description = "The URL of the Airflow REST API." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_api") Component = "SSM" @@ -615,8 +615,8 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow API" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_api") diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 9b9e1aa5..120073fe 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -341,7 +341,7 @@ resource "aws_ssm_parameter" "ogc_processes_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "ui_url"]))) description = "The URL of the OGC Proccesses API Docs UI." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/redoc" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/redoc" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_ui") Component = "SSM" @@ -354,7 +354,7 @@ resource "aws_ssm_parameter" "ogc_processes_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "api_url"]))) description = "The URL of the OGC Processes REST API." type = "String" - value = "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_api") Component = "SSM" @@ -369,8 +369,8 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "OGC API" - "healthCheckUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/health" - "landingPageUrl" : "https://${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-ogc_processes_api") From cd916bf672f34d8ea26a19a5f231abf41954be78 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Sat, 14 Dec 2024 00:10:03 -0800 Subject: [PATCH 28/32] fixing pre-commit newline complaint --- terraform-unity/modules/terraform-unity-sps-airflow/data.tf | 2 +- .../modules/terraform-unity-sps-ogc-processes-api/data.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 95200086..544ce619 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -60,4 +60,4 @@ data "aws_ssm_parameter" "shared_services_region" { data "aws_ssm_parameter" "shared_services_domain" { name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" -} \ No newline at end of file +} diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf index 89819b1c..afd3309b 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf @@ -60,4 +60,4 @@ data "aws_ssm_parameter" "shared_services_region" { data "aws_ssm_parameter" "shared_services_domain" { name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" -} \ No newline at end of file +} From 71429ed0c5cb0802131a7fef7f15c338b8df2259 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:29:50 -0500 Subject: [PATCH 29/32] Define new stage in and stage out CWL workflows --- airflow/dags/cwl_dag_modular.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 6f1bfa7b..8d9898fc 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -28,8 +28,8 @@ from airflow import DAG # Task constants -STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_in.cwl" -STAGE_OUT_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/219-process-task/demos/cwl_dag_stage_out.cwl" +STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/220-stage-in-task/demos/cwl_dag_modular_stage_in.cwl" +STAGE_OUT_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/220-stage-in-task/demos/cwl_dag_modular_stage_out.cwl" LOCAL_DIR = "/shared-task-data" # The path of the working directory where the CWL workflow is executed From 6bdd7355913c218c1574e3abfcd47a421405110a Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:14:01 -0500 Subject: [PATCH 30/32] Remove SSM parameter query for project and venue and define from environment variable --- airflow/dags/cwl_dag_modular.py | 10 ++-------- airflow/plugins/unity_sps_utils.py | 2 -- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 8d9898fc..4938fdb3 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -172,14 +172,8 @@ def select_stage_out(ti): """Retrieve stage out input parameters from SSM parameter store.""" ssm_client = boto3.client("ssm", region_name="us-west-2") - project = ssm_client.get_parameter(Name=unity_sps_utils.SPS_PROJECT_PARAM, WithDecryption=True)[ - "Parameter" - ]["Value"] - - venue = ssm_client.get_parameter(Name=unity_sps_utils.SPS_VENUE_PARAM, WithDecryption=True)["Parameter"][ - "Value" - ] - + project = os.environ["AIRFLOW_VAR_UNITY_PROJECT"] + venue = os.environ["AIRFLOW_VAR_UNITY_VENUE"] staging_bucket = ssm_client.get_parameter(Name=unity_sps_utils.DS_S3_BUCKET_PARAM, WithDecryption=True)[ "Parameter" ]["Value"] diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index e4123712..914c9dad 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -14,8 +14,6 @@ NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" -SPS_PROJECT_PARAM = "/unity/unity-nikki-1/dev/project-name" -SPS_VENUE_PARAM = "/unity/unity-nikki-1/dev/venue-name" DS_S3_BUCKET_PARAM = "/unity/unity-nikki-1/dev/ds/staging/s3/bucket-name" From 27591438f8d989c6e1ec183c603fafe0a45dd538 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 17 Dec 2024 10:33:17 -0500 Subject: [PATCH 31/32] Update stage in STAC JSON default --- airflow/dags/cwl_dag_modular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index 4938fdb3..6a1d06d0 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -38,7 +38,7 @@ WORKING_DIR = "/scratch" # Default parameters -DEFAULT_STAC_JSON = "https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/stage_in_results.json" +DEFAULT_STAC_JSON = "https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/refs/heads/main/test/stage_in/stage_in_results.json" DEFAULT_PROCESS_WORKFLOW = ( "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/process.cwl" ) From 6472ce5a9a4c0793a90dd3c51ce8742257f358f3 Mon Sep 17 00:00:00 2001 From: Bradley Lunsford Date: Thu, 19 Dec 2024 09:40:19 -0800 Subject: [PATCH 32/32] adding venue-level proxy printout to outputs --- .../modules/terraform-unity-sps-airflow/README.md | 2 ++ .../modules/terraform-unity-sps-airflow/data.tf | 4 ++++ .../modules/terraform-unity-sps-airflow/outputs.tf | 12 ++++++++++++ .../terraform-unity-sps-ogc-processes-api/README.md | 2 ++ .../terraform-unity-sps-ogc-processes-api/data.tf | 4 ++++ .../terraform-unity-sps-ogc-processes-api/outputs.tf | 12 ++++++++++++ terraform-unity/outputs.tf | 4 ++++ 7 files changed, 40 insertions(+) diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/README.md b/terraform-unity/modules/terraform-unity-sps-airflow/README.md index df3a4f89..0b8da49a 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/README.md +++ b/terraform-unity/modules/terraform-unity-sps-airflow/README.md @@ -78,6 +78,7 @@ No modules. | [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.venue_proxy_baseurl](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | | [kubernetes_ingress_v1.airflow_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | @@ -107,5 +108,6 @@ No modules. |------|-------------| | [airflow\_deployed\_dags\_pvc](#output\_airflow\_deployed\_dags\_pvc) | n/a | | [airflow\_urls](#output\_airflow\_urls) | SSM parameter IDs and URLs for the various Airflow endpoints. | +| [airflow\_venue\_urls](#output\_airflow\_venue\_urls) | URLs for the various Airflow endpoints at venue-proxy level. | | [s3\_buckets](#output\_s3\_buckets) | SSM parameter IDs and bucket names for the various buckets used in the pipeline. | diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 544ce619..d93317ff 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -61,3 +61,7 @@ data "aws_ssm_parameter" "shared_services_region" { data "aws_ssm_parameter" "shared_services_domain" { name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" } + +data "aws_ssm_parameter" "venue_proxy_baseurl" { + name = "/unity/${var.project}/${var.venue}/management/httpd/loadbalancer-url" +} diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf b/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf index 654d726f..f7ed98c0 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf @@ -12,6 +12,18 @@ output "airflow_urls" { } } +output "airflow_venue_urls" { + description = "URLs for the various Airflow endpoints at venue-proxy level." + value = { + "ui" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "sps/")) + } + "rest_api" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "sps/api/v1")) + } + } +} + output "s3_buckets" { description = "SSM parameter IDs and bucket names for the various buckets used in the pipeline." value = { diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md index 04390dc9..775938a2 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md @@ -43,6 +43,7 @@ No modules. | [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.venue_proxy_baseurl](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | | [kubernetes_ingress_v1.ogc_processes_api_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | @@ -71,4 +72,5 @@ No modules. | Name | Description | |------|-------------| | [ogc\_processes\_urls](#output\_ogc\_processes\_urls) | SSM parameter IDs and URLs for the various OGC Processes endpoints. | +| [ogc\_processes\_venue\_urls](#output\_ogc\_processes\_venue\_urls) | URLs for the various OGC Processes endpoints at venue-proxy level. | diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf index afd3309b..d29c4d91 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf @@ -61,3 +61,7 @@ data "aws_ssm_parameter" "shared_services_region" { data "aws_ssm_parameter" "shared_services_domain" { name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" } + +data "aws_ssm_parameter" "venue_proxy_baseurl" { + name = "/unity/${var.project}/${var.venue}/management/httpd/loadbalancer-url" +} diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf index a8302464..e4e892d0 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf @@ -11,3 +11,15 @@ output "ogc_processes_urls" { } } } + +output "ogc_processes_venue_urls" { + description = "URLs for the various OGC Processes endpoints at venue-proxy level." + value = { + "ui" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "ogc/redoc")) + } + "rest_api" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "ogc/")) + } + } +} diff --git a/terraform-unity/outputs.tf b/terraform-unity/outputs.tf index 4d1eeb00..b0e84a8d 100644 --- a/terraform-unity/outputs.tf +++ b/terraform-unity/outputs.tf @@ -5,6 +5,10 @@ output "resources" { "airflow" = module.unity-sps-airflow.airflow_urls "ogc_processes" = module.unity-sps-ogc-processes-api.ogc_processes_urls } + "venue_endpoints" = { + "airflow" = module.unity-sps-airflow.airflow_venue_urls + "ogc_processes" = module.unity-sps-ogc-processes-api.ogc_processes_venue_urls + } "buckets" = module.unity-sps-airflow.s3_buckets } }