diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 428391b6..6986a2ee 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -13,6 +13,7 @@ env: TAG: ${{ github.event.inputs.tag }} SPS_AIRFLOW: ${{ github.repository }}/sps-airflow SPS_DOCKER_CWL: ${{ github.repository }}/sps-docker-cwl + SPS_DOCKER_CWL_MODULAR: ${{ github.repository }}/sps-docker-cwl-modular jobs: build-sps-airflow: @@ -61,3 +62,26 @@ jobs: push: true tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }}:${{ env.TAG }} labels: ${{ steps.metascheduler.outputs.labels }} + build-sps-docker-cwl-modular: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for SPS Docker CWL modular image + id: metascheduler + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL_MODULAR }} + - name: Build and push SPS Docker CWL modular image + uses: docker/build-push-action@v5 + with: + context: ./airflow/docker/cwl + file: airflow/docker/cwl/Dockerfile-modular + push: true + tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL_MODULAR }}:${{ env.TAG }} + labels: ${{ steps.metascheduler.outputs.labels }} diff --git a/airflow/dags/cwl_dag_modular.py b/airflow/dags/cwl_dag_modular.py index bd2eb466..6a1d06d0 100644 --- a/airflow/dags/cwl_dag_modular.py +++ b/airflow/dags/cwl_dag_modular.py @@ -3,8 +3,11 @@ The Airflow KubernetesPodOperator starts a Docker container that includes the Docker engine and the CWL libraries. The "cwl-runner" tool is invoked to execute the CWL workflow. -Parameter cwl_workflow: the URL of the CWL workflow to execute. -Parameter args_as_json: JSON string contained the specific values for the workflow specific inputs. +Parameter stage_in_args: The stage in job parameters encoded as a JSON string +Parameter process_workflow: the URL of the CWL workflow to execute. +Parameter process_args: JSON string contained the specific values for the processing workflow specific inputs. +Parameter stage_out_bucket: The S3 bucket to stage data out to. +Parameter collection_id: The output collection identifier for processed data. """ import json @@ -25,8 +28,8 @@ from airflow import DAG # Task constants -UNITY_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-unity/stage-in-workflow.cwl" -DAAC_STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-data-services/refs/heads/cwl-examples/cwl/stage-in-daac/stage-in-workflow.cwl" +STAGE_IN_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/220-stage-in-task/demos/cwl_dag_modular_stage_in.cwl" +STAGE_OUT_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/refs/heads/220-stage-in-task/demos/cwl_dag_modular_stage_out.cwl" LOCAL_DIR = "/shared-task-data" # The path of the working directory where the CWL workflow is executed @@ -35,21 +38,19 @@ WORKING_DIR = "/scratch" # Default parameters -DEFAULT_CWL_WORKFLOW = ( - "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/main/demos/echo_message.cwl" +DEFAULT_STAC_JSON = "https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/refs/heads/main/test/stage_in/stage_in_results.json" +DEFAULT_PROCESS_WORKFLOW = ( + "https://raw.githubusercontent.com/mike-gangl/unity-OGC-example-application/refs/heads/main/process.cwl" ) -DEFAULT_CWL_ARGUMENTS = json.dumps({"message": "Hello Unity"}) -DEFAULT_STAC_JSON_URL = "https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/EMITL1BRAD_001/items?limit=2" -DEFAULT_INPUT_LOCATION = "daac" - +DEFAULT_PROCESS_ARGS = json.dumps({"example_argument_empty": ""}) # Alternative arguments to execute SBG Pre-Process -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" -# DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" +# DEFAULT_PROCESS_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" +# DEFAULT_PROCESS_ARGS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" # Alternative arguments to execute SBG end-to-end -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.cwl" -# DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.dev.yml" +# DEFAULT_PROCESS_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.cwl" +# DEFAULT_PROCESS_ARGS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.dev.yml" # Alternative arguments to execute SBG end-to-end # unity_sps_sbg_debug.txt @@ -67,13 +68,6 @@ # "ephemeral-storage": "30Gi" # }, ) -STAGE_IN_CONTAINER_RESOURCES = k8s.V1ResourceRequirements( - requests={ - "memory": "4Gi", - "cpu": "4", - "ephemeral-storage": "{{ params.request_storage }}", - } -) # Default DAG configuration dag_default_args = { @@ -95,14 +89,25 @@ max_active_tasks=30, default_args=dag_default_args, params={ - "cwl_workflow": Param( - DEFAULT_CWL_WORKFLOW, type="string", title="CWL workflow", description="The CWL workflow URL" + "stac_json": Param( + DEFAULT_STAC_JSON, + type="string", + title="STAC JSON", + description="STAC JSON data to download granules encoded as a JSON string or the URL of a JSON or YAML file", + ), + "process_workflow": Param( + DEFAULT_PROCESS_WORKFLOW, + type="string", + title="Processing workflow", + description="The processing workflow URL", ), - "cwl_args": Param( - DEFAULT_CWL_ARGUMENTS, + "process_args": Param( + DEFAULT_PROCESS_ARGS, type="string", - title="CWL workflow parameters", - description=("The job parameters encoded as a JSON string," "or the URL of a JSON or YAML file"), + title="Processing workflow parameters", + description=( + "The processing job parameters encoded as a JSON string," "or the URL of a JSON or YAML file" + ), ), "request_memory": Param( "4Gi", @@ -123,42 +128,27 @@ title="Docker container storage", ), "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), - "stac_json_url": Param( - DEFAULT_STAC_JSON_URL, - type="string", - title="STAC JSON URL", - description="The URL to the STAC JSON document", - ), - "input_location": Param( - DEFAULT_INPUT_LOCATION, - type="string", - enum=["daac", "unity"], - title="Input data location", - description="Indicate whether input data should be retrieved from a DAAC or Unity", - ), }, ) -def setup(ti=None, **context): +def create_local_dir(dag_run_id): """ - Task that creates the working directory on the shared volume - and parses the input parameter values. + Create local directory for working DAG data. """ - context = get_current_context() - dag_run_id = context["dag_run"].run_id local_dir = f"{LOCAL_DIR}/{dag_run_id}" - logging.info(f"Creating directory: {local_dir}") os.makedirs(local_dir, exist_ok=True) logging.info(f"Created directory: {local_dir}") - # select the node pool based on what resources were requested + +def select_node_pool(ti, request_storage, request_memory, request_cpu): + """ + Select node pool based on resources requested in input parameters. + """ node_pool = unity_sps_utils.NODE_POOL_DEFAULT - storage = context["params"]["request_storage"] # 100Gi - storage = int(storage[0:-2]) # 100 - memory = context["params"]["request_memory"] # 32Gi - memory = int(memory[0:-2]) # 32 - cpu = int(context["params"]["request_cpu"]) # 8 + storage = int(request_storage[0:-2]) # 100Gi -> 100 + memory = int(request_memory[0:-2]) # 32Gi -> 32 + cpu = int(request_cpu) # 8 logging.info(f"Requesting storage={storage}Gi memory={memory}Gi CPU={cpu}") if (storage > 30) or (memory > 32) or (cpu > 8): @@ -166,84 +156,60 @@ def setup(ti=None, **context): logging.info(f"Selecting node pool={node_pool}") ti.xcom_push(key="node_pool_processing", value=node_pool) - # select "use_ecr" argument and determine if ECR login is required - logging.info("Use ECR: %s", context["params"]["use_ecr"]) - if context["params"]["use_ecr"]: + +def select_ecr(ti, use_ecr): + """ + Determine if ECR login is required. + """ + logging.info("Use ECR: %s", use_ecr) + if use_ecr: ecr_login = os.environ["AIRFLOW_VAR_ECR_URI"] ti.xcom_push(key="ecr_login", value=ecr_login) logging.info("ECR login: %s", ecr_login) - # define stage in arguments - stage_in_args = {"download_dir": "input", "stac_json": context["params"]["stac_json_url"]} - # select stage in workflow based on input location - if context["params"]["input_location"] == "daac": - stage_in_workflow = DAAC_STAGE_IN_WORKFLOW - else: - stage_in_workflow = UNITY_STAGE_IN_WORKFLOW - ssm_client = boto3.client("ssm", region_name="us-west-2") - ss_acct_num = ssm_client.get_parameter(Name=unity_sps_utils.SS_ACT_NUM, WithDecryption=True)[ - "Parameter" - ]["Value"] - unity_client_id = ssm_client.get_parameter( - Name=f"arn:aws:ssm:us-west-2:{ss_acct_num}:parameter{unity_sps_utils.DS_CLIENT_ID_PARAM}", - WithDecryption=True, - )["Parameter"]["Value"] - stage_in_args["unity_client_id"] = unity_client_id +def select_stage_out(ti): + """Retrieve stage out input parameters from SSM parameter store.""" + ssm_client = boto3.client("ssm", region_name="us-west-2") - ti.xcom_push(key="stage_in_workflow", value=stage_in_workflow) - logging.info("Stage In workflow selected: %s", stage_in_workflow) + project = os.environ["AIRFLOW_VAR_UNITY_PROJECT"] + venue = os.environ["AIRFLOW_VAR_UNITY_VENUE"] + staging_bucket = ssm_client.get_parameter(Name=unity_sps_utils.DS_S3_BUCKET_PARAM, WithDecryption=True)[ + "Parameter" + ]["Value"] - ti.xcom_push(key="stage_in_args", value=stage_in_args) - logging.info("Stage in arguments selected: %s", stage_in_args) + stage_out_args = json.dumps({"project": project, "venue": venue, "staging_bucket": staging_bucket}) + logging.info(f"Selecting stage out args={stage_out_args}") + ti.xcom_push(key="stage_out_args", value=stage_out_args) -setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) +def setup(ti=None, **context): + """ + Task that creates the working directory on the shared volume + and parses the input parameter values. + """ + context = get_current_context() + # create local working directory + dag_run_id = context["dag_run"].run_id + create_local_dir(dag_run_id) -cwl_task_stage_in = unity_sps_utils.SpsKubernetesPodOperator( - retries=0, - task_id="cwl_task_stage_in", - namespace=unity_sps_utils.POD_NAMESPACE, - name="cwl-task-pod", - image=unity_sps_utils.SPS_DOCKER_CWL_IMAGE, - service_account_name="airflow-worker", - in_cluster=True, - get_logs=True, - startup_timeout_seconds=1800, - arguments=[ - "-w", - "{{ ti.xcom_pull(task_ids='Setup', key='stage_in_workflow') }}", - "-j", - "{{ ti.xcom_pull(task_ids='Setup', key='stage_in_args') }}", - "-e", - "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", - ], - container_security_context={"privileged": True}, - container_resources=STAGE_IN_CONTAINER_RESOURCES, - container_logs=True, - volume_mounts=[ - k8s.V1VolumeMount(name="workers-volume", mount_path=WORKING_DIR, sub_path="{{ dag_run.run_id }}") - ], - volumes=[ - k8s.V1Volume( - name="workers-volume", - persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name="airflow-kpo"), - ) - ], - dag=dag, - node_selector={"karpenter.sh/nodepool": unity_sps_utils.NODE_POOL_DEFAULT}, - labels={"app": unity_sps_utils.POD_LABEL}, - annotations={"karpenter.sh/do-not-disrupt": "true"}, - # note: 'affinity' cannot yet be templated - affinity=unity_sps_utils.get_affinity( - capacity_type=["spot"], - # instance_type=["t3.2xlarge"], - anti_affinity_label=unity_sps_utils.POD_LABEL, - ), - on_finish_action="keep_pod", - is_delete_operator_pod=False, -) + # select the node pool based on what resources were requested + select_node_pool( + ti, + context["params"]["request_storage"], + context["params"]["request_memory"], + context["params"]["request_cpu"], + ) + + # select "use_ecr" argument and determine if ECR login is required + select_ecr(ti, context["params"]["use_ecr"]) + + # retrieve stage out aws api key and account id + select_stage_out(ti) + + +setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) cwl_task_processing = unity_sps_utils.SpsKubernetesPodOperator( @@ -251,16 +217,24 @@ def setup(ti=None, **context): task_id="cwl_task_processing", namespace=unity_sps_utils.POD_NAMESPACE, name="cwl-task-pod", - image=unity_sps_utils.SPS_DOCKER_CWL_IMAGE, + image=unity_sps_utils.SPS_DOCKER_CWL_IMAGE_MODULAR, service_account_name="airflow-worker", in_cluster=True, get_logs=True, startup_timeout_seconds=1800, arguments=[ + "-i", + STAGE_IN_WORKFLOW, + "-s", + "{{ params.stac_json }}", "-w", - "{{ params.cwl_workflow }}", + "{{ params.process_workflow }}", "-j", - "{{ params.cwl_args }}", + "{{ params.process_args }}", + "-o", + STAGE_OUT_WORKFLOW, + "-d", + "{{ ti.xcom_pull(task_ids='Setup', key='stage_out_args') }}", "-e", "{{ ti.xcom_pull(task_ids='Setup', key='ecr_login') }}", ], @@ -313,6 +287,5 @@ def cleanup(**context): task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) -chain( - setup_task.as_setup(), cwl_task_stage_in, cwl_task_processing, cleanup_task.as_teardown(setups=setup_task) -) + +chain(setup_task.as_setup(), cwl_task_processing, cleanup_task.as_teardown(setups=setup_task)) diff --git a/airflow/docker/cwl/Dockerfile_modular b/airflow/docker/cwl/Dockerfile_modular new file mode 100644 index 00000000..d3d3314f --- /dev/null +++ b/airflow/docker/cwl/Dockerfile_modular @@ -0,0 +1,25 @@ +# docker:dind Dockerfile: https://github.com/docker-library/docker/blob/master/Dockerfile-dind.template +# FROM docker:dind +FROM docker:25.0.3-dind + +# install Python +RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +RUN apk add gcc musl-dev linux-headers python3-dev jq +RUN apk add --no-cache python3 py3-pip +RUN apk add vim + +# install CWL libraries +RUN mkdir /usr/share/cwl \ + && cd /usr/share/cwl \ + && python -m venv venv \ + && source venv/bin/activate \ + && pip install cwltool cwl-runner docker boto3 awscli pyyaml + +# install nodejs to parse Javascript in CWL files +RUN apk add --no-cache nodejs npm + +# script to execute a generic CWL workflow with arguments +COPY docker_cwl_entrypoint_modular.sh /usr/share/cwl/docker_cwl_entrypoint_modular.sh + +WORKDIR /usr/share/cwl +ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint_modular.sh"] diff --git a/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh new file mode 100755 index 00000000..e7494d7a --- /dev/null +++ b/airflow/docker/cwl/docker_cwl_entrypoint_modular.sh @@ -0,0 +1,141 @@ +#!/bin/sh +# Script to execute a CWL workflow that includes Docker containers +# The Docker engine is started before the CWL execution, and stopped afterwards. +# -i: The CWL workflow URL for the stage in task +# -s: STAC JSON URL or JSON data that describes input data requiring download +# -w: the CWL workflow URL for the process task +# (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.cwl) +# -j: a) the CWL process job parameters as a JSON formatted string +# (example: { "name": "John Doe" }) +# OR b) The URL of a YAML or JSON file containing the job parameters +# (example: https://github.com/unity-sds/sbg-workflows/blob/main/L1-to-L2-e2e.dev.yml) +# -o: The CWL workflow URL for the stage out task +# -d: The CWL stage out job parameters as a JSON formatted string +# -e: the ECR login URL where the AWS account ID and region are specific to the Airflow installation +# (example: .dkr.ecr..amazonaws.com) [optional] +# -f: path to an output JSON file that needs to be shared as Airflow "xcom" data [optional] + +# Can be the same as the path of the Persistent Volume mounted by the Airflow KubernetesPodOperator +# that executes this script to execute on EFS. +WORKING_DIR="/data" # Set to EBS directory + +get_job_args() { + local job_args=$1 + workflow=$2 + # switch between the 2 cases a) and b) for job_args + # remove arguments from previous tasks + if [ "$job_args" = "${job_args#{}" ] + then + # job_args does NOT start with '{' + job_args_file=$job_args + else + # job_args starts with '{' + echo "$job_args" > ./job_args_$workflow.json + job_args_file="./job_args_$workflow.json" + fi + echo $job_args_file +} + +set -ex +while getopts i:s:w:j:o:d:e:f: flag +do + case "${flag}" in + i) cwl_workflow_stage_in=${OPTARG};; + s) stac_json=${OPTARG};; + w) cwl_workflow_process=${OPTARG};; + j) job_args_process=${OPTARG};; + o) cwl_workflow_stage_out=${OPTARG};; + d) job_args_stage_out=${OPTARG};; + e) ecr_login=${OPTARG};; + f) json_output=${OPTARG};; + esac +done + +# create working directory if it doesn't exist +mkdir -p "$WORKING_DIR" +cd $WORKING_DIR + +echo "JSON XCOM output: ${json_output}" + +# Start Docker engine +dockerd &> dockerd-logfile & + +# Wait until Docker engine is running +# Loop until 'docker version' exits with 0. +until docker version > /dev/null 2>&1 +do + sleep 1 +done + +# Activate Python virtual environments for executables +. /usr/share/cwl/venv/bin/activate + +# Log into AWS ECR repository +if [ "$ecr_login" != "None" ]; then +IFS=. read account_id dkr ecr aws_region amazonaws com < $job_args_process_updated +mv $job_args_process_updated $job_args_process +echo "Executing the CWL workflow: $cwl_workflow_process with json arguments: $job_args_process and working directory: $WORKING_DIR" + +# Process operations +process=$(cwltool --outdir process $cwl_workflow_process $job_args_process) +echo $process + +# Get directory that contains processed files +process_dir=$(echo $process | jq '.output.path') +process_dir=$(echo "$process_dir" | tr -d '"') +echo "Process output directory: $process_dir" +ls -l $process_dir + +# Add process directory into stage out job arguments +echo "Editing stage out arguments: $job_args_stage_out" +echo $job_args_stage_out | jq --arg data_dir $process_dir '. += {"sample_output_data": {"class": "Directory", "path": $data_dir}}' > ./job_args_stage_out.json +echo "Executing the CWL workflow: $cwl_workflow_stage_out with json arguments: job_args_stage_out.json and working directory: $WORKING_DIR" + +# Stage out operations +stage_out=$(cwltool --outdir stage_out $cwl_workflow_stage_out job_args_stage_out.json) + +# Report on stage out +successful_features=$(echo "$stage_out" | jq '.successful_features.path' | tr -d "[]\",\\t ") +successful_features=$(cat $successful_features | jq '.') +echo Successful features: $successful_features + +failed_features=$(echo "$stage_out" | jq '.failed_features.path' | tr -d "[]\",\\t ") +failed_features=$(cat $failed_features | jq '.') +echo Failed features: $failed_features + +# Optionally, save the requested output file to a location +# where it will be picked up by the Airflow XCOM mechanism +# Note: the content of the file MUST be valid JSON or XCOM will fail. +if [ ! -z "${json_output}" -a "${json_output}" != " " ]; then + mkdir -p /airflow/xcom/ + cp ${json_output} /airflow/xcom/return.json +fi + +deactivate + +# Stop Docker engine +pkill -f dockerd diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 449b57a2..d87fa1f9 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -250,6 +250,8 @@ config: encrypt_s3_logs: false celery: worker_concurrency: 16 + webserver: + enable_proxy_fix: 'True' dags: persistence: diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index b06555f5..914c9dad 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -9,12 +9,12 @@ POD_NAMESPACE = "sps" # The Kubernetes namespace within which the Pod is run (it must already exist) POD_LABEL = "cwl_task" SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.3.0" +SPS_DOCKER_CWL_IMAGE_MODULAR = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl-modular:2.3.0" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" -DS_CLIENT_ID_PARAM = "/unity/shared-services/cognito/hysds-ui-client-id" -SS_ACT_NUM = "/unity/shared-services/aws/account" +DS_S3_BUCKET_PARAM = "/unity/unity-nikki-1/dev/ds/staging/s3/bucket-name" class SpsKubernetesPodOperator(KubernetesPodOperator): diff --git a/terraform-unity/README.md b/terraform-unity/README.md index 419b6baa..742bfdab 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -191,7 +191,9 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [airflow\_webserver\_password](#input\_airflow\_webserver\_password) | The password for the Airflow webserver and UI. | `string` | n/a | yes | | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | | [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "2.2.0",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"airflow": {
"chart": "airflow",
"repository": "https://airflow.apache.org",
"version": "1.15.0"
},
"keda": {
"chart": "keda",
"repository": "https://kedacore.github.io/charts",
"version": "v2.15.1"
}
}
| no | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "300Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | | [karpenter\_node\_pools](#input\_karpenter\_node\_pools) | Configuration for Karpenter node pools |
map(object({
requirements : list(object({
key : string
operator : string
values : list(string)
}))
nodeClassRef : string
limits : object({
cpu : string
memory : string
})
disruption : object({
consolidationPolicy : string
consolidateAfter : string
})
}))
|
{
"airflow-celery-workers": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "80",
"memory": "320Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"9"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-core-components": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "40",
"memory": "160Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "100",
"memory": "400Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator-high-workload": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "528",
"memory": "1056Gi"
},
"nodeClassRef": "airflow-kubernetes-pod-operator-high-workload",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"49"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"98305"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
}
}
| no | | [kubeconfig\_filepath](#input\_kubeconfig\_filepath) | The path to the kubeconfig file for the Kubernetes cluster. | `string` | n/a | yes | @@ -200,6 +202,7 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [project](#input\_project) | The project or mission deploying Unity SPS. | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed. | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the resources will be deployed. | `string` | n/a | yes | ## Outputs diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/README.md b/terraform-unity/modules/terraform-unity-sps-airflow/README.md index f8db6278..0b8da49a 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/README.md +++ b/terraform-unity/modules/terraform-unity-sps-airflow/README.md @@ -42,7 +42,6 @@ No modules. | [aws_s3_bucket.airflow_logs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/s3_bucket) | resource | | [aws_s3_bucket_policy.airflow_logs_s3_policy](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/s3_bucket_policy) | resource | | [aws_security_group.airflow_efs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | -| [aws_security_group.airflow_ingress_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group.airflow_ingress_sg_internal](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group_rule.airflow_efs](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group_rule) | resource | | [aws_ssm_parameter.airflow_api_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | @@ -51,11 +50,9 @@ No modules. | [aws_ssm_parameter.airflow_ui_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.airflow_ui_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.unity_proxy_airflow_ui](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | -| [aws_vpc_security_group_ingress_rule.airflow_ingress_sg_jpl_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [aws_vpc_security_group_ingress_rule.airflow_ingress_sg_proxy_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [helm_release.airflow](https://registry.terraform.io/providers/hashicorp/helm/2.15.0/docs/resources/release) | resource | | [helm_release.keda](https://registry.terraform.io/providers/hashicorp/helm/2.15.0/docs/resources/release) | resource | -| [kubernetes_ingress_v1.airflow_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_ingress_v1.airflow_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_namespace.keda](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/namespace) | resource | | [kubernetes_persistent_volume.airflow_deployed_dags](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/persistent_volume) | resource | @@ -77,10 +74,12 @@ No modules. | [aws_lambda_functions.lambda_check_all](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/lambda_functions) | data source | | [aws_secretsmanager_secret_version.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/secretsmanager_secret_version) | data source | | [aws_security_groups.venue_proxy_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/security_groups) | data source | -| [aws_ssm_parameter.ssl_cert_arn](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_account](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.venue_proxy_baseurl](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | -| [kubernetes_ingress_v1.airflow_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_ingress_v1.airflow_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | @@ -109,5 +108,6 @@ No modules. |------|-------------| | [airflow\_deployed\_dags\_pvc](#output\_airflow\_deployed\_dags\_pvc) | n/a | | [airflow\_urls](#output\_airflow\_urls) | SSM parameter IDs and URLs for the various Airflow endpoints. | +| [airflow\_venue\_urls](#output\_airflow\_venue\_urls) | URLs for the various Airflow endpoints at venue-proxy level. | | [s3\_buckets](#output\_s3\_buckets) | SSM parameter IDs and bucket names for the various buckets used in the pipeline. | diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 2e136b5a..d93317ff 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -18,12 +18,13 @@ data "kubernetes_namespace" "service_area" { } } +/* Note: re-enable this to allow access via the JPL network data "kubernetes_ingress_v1" "airflow_ingress" { metadata { name = kubernetes_ingress_v1.airflow_ingress.metadata[0].name namespace = data.kubernetes_namespace.service_area.metadata[0].name } -} +}*/ data "kubernetes_ingress_v1" "airflow_ingress_internal" { metadata { @@ -44,6 +45,23 @@ data "aws_efs_file_system" "efs" { file_system_id = var.efs_file_system_id } +/* Note: re-enable this to allow access via the JPL network data "aws_ssm_parameter" "ssl_cert_arn" { name = "/unity/account/network/ssl" +}*/ + +data "aws_ssm_parameter" "shared_services_account" { + name = "/unity/shared-services/aws/account" +} + +data "aws_ssm_parameter" "shared_services_region" { + name = "/unity/shared-services/aws/account/region" +} + +data "aws_ssm_parameter" "shared_services_domain" { + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" +} + +data "aws_ssm_parameter" "venue_proxy_baseurl" { + name = "/unity/${var.project}/${var.venue}/management/httpd/loadbalancer-url" } diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index b9acf1b5..47732eba 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -430,6 +430,7 @@ resource "helm_release" "airflow" { ] } +/* Note: re-enable this to allow access via the JPL network resource "aws_security_group" "airflow_ingress_sg" { name = "${var.project}-${var.venue}-airflow-ingress-sg" description = "SecurityGroup for Airflow LoadBalancer ingress" @@ -439,7 +440,7 @@ resource "aws_security_group" "airflow_ingress_sg" { Component = "airflow" Stack = "airflow" }) -} +}*/ resource "aws_security_group" "airflow_ingress_sg_internal" { name = "${var.project}-${var.venue}-airflow-internal-ingress-sg" @@ -452,6 +453,7 @@ resource "aws_security_group" "airflow_ingress_sg_internal" { }) } +/* Note: re-enable this to allow access via the JPL network #tfsec:ignore:AVD-AWS-0107 resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_jpl_rule" { for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) @@ -461,7 +463,7 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_jpl_rule" { from_port = local.load_balancer_port to_port = local.load_balancer_port cidr_ipv4 = each.key -} +}*/ data "aws_security_groups" "venue_proxy_sg" { filter { @@ -484,6 +486,7 @@ resource "aws_vpc_security_group_ingress_rule" "airflow_ingress_sg_proxy_rule" { referenced_security_group_id = data.aws_security_groups.venue_proxy_sg.ids[0] } +/* Note: re-enable this to allow access via the JPL network resource "kubernetes_ingress_v1" "airflow_ingress" { metadata { name = "airflow-ingress" @@ -521,7 +524,7 @@ resource "kubernetes_ingress_v1" "airflow_ingress" { } wait_for_load_balancer = true depends_on = [helm_release.airflow] -} +}*/ resource "kubernetes_ingress_v1" "airflow_ingress_internal" { metadata { @@ -564,12 +567,13 @@ resource "aws_ssm_parameter" "airflow_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "ui_url"]))) description = "The URL of the Airflow UI." type = "String" - value = "https://${data.kubernetes_ingress_v1.airflow_ingress.status[0].load_balancer[0].ingress[0].hostname}:5000" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_ui") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { @@ -578,8 +582,8 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow UI" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_ui") @@ -589,18 +593,20 @@ resource "aws_ssm_parameter" "airflow_ui_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "api_url"]))) description = "The URL of the Airflow REST API." type = "String" - value = "https://${data.kubernetes_ingress_v1.airflow_ingress.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-airflow_api") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { @@ -609,8 +615,8 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "Airflow API" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/api/v1" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/sps/api/v1" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-airflow_api") @@ -620,6 +626,7 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_airflow_ui] } resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { @@ -635,7 +642,7 @@ resource "aws_ssm_parameter" "unity_proxy_airflow_ui" { Redirect "/${var.project}/${var.venue}/sps/home" - ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" + ProxyPassMatch "http://${data.kubernetes_ingress_v1.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5000/$1" retry=5 disablereuse=On ProxyPreserveHost On FallbackResource /management/index.html AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html @@ -653,8 +660,8 @@ EOT data "aws_lambda_functions" "lambda_check_all" {} resource "aws_lambda_invocation" "unity_proxy_lambda_invocation" { - count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "unity-${var.venue}-httpdproxymanagement") ? 1 : 0 - function_name = "unity-${var.venue}-httpdproxymanagement" + count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "${var.project}-${var.venue}-httpdproxymanagement") ? 1 : 0 + function_name = "${var.project}-${var.venue}-httpdproxymanagement" input = "{}" triggers = { redeployment = sha1(jsonencode([ diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf b/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf index 654d726f..f7ed98c0 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/outputs.tf @@ -12,6 +12,18 @@ output "airflow_urls" { } } +output "airflow_venue_urls" { + description = "URLs for the various Airflow endpoints at venue-proxy level." + value = { + "ui" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "sps/")) + } + "rest_api" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "sps/api/v1")) + } + } +} + output "s3_buckets" { description = "SSM parameter IDs and bucket names for the various buckets used in the pipeline." value = { diff --git a/terraform-unity/modules/terraform-unity-sps-eks/README.md b/terraform-unity/modules/terraform-unity-sps-eks/README.md index 24bc32aa..6761714a 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/README.md +++ b/terraform-unity/modules/terraform-unity-sps-eks/README.md @@ -18,7 +18,7 @@ | Name | Source | Version | |------|--------|---------| -| [unity-eks](#module\_unity-eks) | git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.2.0-hotfix | +| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.4.0 | ## Resources @@ -31,10 +31,13 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the cluster will be deployed (dev, test, prod) | `string` | n/a | yes | ## Outputs diff --git a/terraform-unity/modules/terraform-unity-sps-eks/main.tf b/terraform-unity/modules/terraform-unity-sps-eks/main.tf index 777fd649..20fae0fa 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/main.tf @@ -9,8 +9,10 @@ terraform { } module "unity-eks" { - source = "git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=unity-sps-2.2.0-hotfix" + source = "git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=unity-sps-2.4.0" deployment_name = local.cluster_name + project = var.project + venue = var.venue nodegroups = var.nodegroups aws_auth_roles = [{ rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/mcp-tenantOperator" diff --git a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf index 3b36d1c3..b8a4be46 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf @@ -21,6 +21,27 @@ variable "release" { default = "24.3" } +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string + default = "" +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +} + variable "nodegroups" { description = "A map of node group configurations" type = map(object({ diff --git a/terraform-unity/modules/terraform-unity-sps-initiators/main.tf b/terraform-unity/modules/terraform-unity-sps-initiators/main.tf index 56893c70..0e9f6e8e 100644 --- a/terraform-unity/modules/terraform-unity-sps-initiators/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-initiators/main.tf @@ -78,7 +78,7 @@ resource "aws_s3_object" "router_config" { } module "unity_initiator" { - source = "git@github.com:unity-sds/unity-initiator.git//terraform-unity/initiator?ref=unity-sps-2.2.0" + source = "git::https://github.com/unity-sds/unity-initiator.git//terraform-unity/initiator?ref=unity-sps-2.2.0" code_bucket = aws_s3_bucket.code.id project = var.project router_config = "s3://${aws_s3_bucket.config.id}/${aws_s3_object.router_config.key}" @@ -91,7 +91,7 @@ resource "aws_s3_object" "isl_stacam_rawdp_folder" { } module "s3_bucket_notification" { - source = "git@github.com:unity-sds/unity-initiator.git//terraform-unity/triggers/s3-bucket-notification?ref=unity-sps-2.2.0" + source = "git::https://github.com/unity-sds/unity-initiator.git//terraform-unity/triggers/s3-bucket-notification?ref=unity-sps-2.2.0" initiator_topic_arn = module.unity_initiator.initiator_topic_arn isl_bucket = aws_s3_bucket.inbound_staging_location.id isl_bucket_prefix = "STACAM/RawDP/" diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md index c8d232be..5a58d601 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/README.md +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/README.md @@ -35,10 +35,13 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"karpenter": {
"chart": "karpenter",
"repository": "oci://public.ecr.aws/karpenter",
"version": "1.0.2"
}
}
| no | +| [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.3"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | +| [tags](#input\_tags) | Tags for the deployment (unused) | `map(string)` |
{
"empty": ""
}
| no | | [venue](#input\_venue) | The MCP venue in which the cluster will be deployed (dev, test, prod) | `string` | n/a | yes | ## Outputs diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf index 84221fcd..e9008955 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf @@ -21,6 +21,27 @@ variable "release" { default = "24.3" } +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string + default = "" +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +} + variable "helm_charts" { description = "Helm charts for the associated services." type = map(object({ diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md index 124e403f..775938a2 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/README.md @@ -23,17 +23,14 @@ No modules. | Name | Type | |------|------| | [aws_lambda_invocation.unity_proxy_lambda_invocation](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/lambda_invocation) | resource | -| [aws_security_group.ogc_ingress_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_security_group.ogc_ingress_sg_internal](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/security_group) | resource | | [aws_ssm_parameter.ogc_processes_api_health_check_endpoint](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.ogc_processes_api_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.ogc_processes_ui_url](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | | [aws_ssm_parameter.unity_proxy_ogc_api](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/ssm_parameter) | resource | -| [aws_vpc_security_group_ingress_rule.ogc_ingress_sg_jpl_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [aws_vpc_security_group_ingress_rule.ogc_ingress_sg_proxy_rule](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/vpc_security_group_ingress_rule) | resource | | [kubernetes_deployment.ogc_processes_api](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/deployment) | resource | | [kubernetes_deployment.redis](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/deployment) | resource | -| [kubernetes_ingress_v1.ogc_processes_api_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_ingress_v1.ogc_processes_api_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/ingress_v1) | resource | | [kubernetes_service.ogc_processes_api](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/service) | resource | | [kubernetes_service.redis](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/resources/service) | resource | @@ -42,10 +39,12 @@ No modules. | [aws_lambda_functions.lambda_check_all](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/lambda_functions) | data source | | [aws_secretsmanager_secret_version.db](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/secretsmanager_secret_version) | data source | | [aws_security_groups.venue_proxy_sg](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/security_groups) | data source | -| [aws_ssm_parameter.ssl_cert_arn](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_account](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_domain](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.shared_services_region](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | +| [aws_ssm_parameter.venue_proxy_baseurl](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/ssm_parameter) | data source | | [aws_vpc.cluster_vpc](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/vpc) | data source | -| [kubernetes_ingress_v1.ogc_processes_api_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_ingress_v1.ogc_processes_api_ingress_internal](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/ingress_v1) | data source | | [kubernetes_namespace.service_area](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/namespace) | data source | | [kubernetes_persistent_volume_claim.airflow_deployed_dags](https://registry.terraform.io/providers/hashicorp/kubernetes/2.32.0/docs/data-sources/persistent_volume_claim) | data source | @@ -73,4 +72,5 @@ No modules. | Name | Description | |------|-------------| | [ogc\_processes\_urls](#output\_ogc\_processes\_urls) | SSM parameter IDs and URLs for the various OGC Processes endpoints. | +| [ogc\_processes\_venue\_urls](#output\_ogc\_processes\_venue\_urls) | URLs for the various OGC Processes endpoints at venue-proxy level. | diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf index d76a7e60..d29c4d91 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/data.tf @@ -30,12 +30,13 @@ data "kubernetes_persistent_volume_claim" "airflow_deployed_dags" { } } +/* Note: re-enable this to allow access via the JPL network data "kubernetes_ingress_v1" "ogc_processes_api_ingress" { metadata { name = kubernetes_ingress_v1.ogc_processes_api_ingress.metadata[0].name namespace = data.kubernetes_namespace.service_area.metadata[0].name } -} +}*/ data "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { metadata { @@ -44,6 +45,23 @@ data "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { } } +/* Note: re-enable this to allow access via the JPL network data "aws_ssm_parameter" "ssl_cert_arn" { name = "/unity/account/network/ssl" +}*/ + +data "aws_ssm_parameter" "shared_services_account" { + name = "/unity/shared-services/aws/account" +} + +data "aws_ssm_parameter" "shared_services_region" { + name = "/unity/shared-services/aws/account/region" +} + +data "aws_ssm_parameter" "shared_services_domain" { + name = "arn:aws:ssm:${data.aws_ssm_parameter.shared_services_region.value}:${data.aws_ssm_parameter.shared_services_account.value}:parameter/unity/shared-services/domain" +} + +data "aws_ssm_parameter" "venue_proxy_baseurl" { + name = "/unity/${var.project}/${var.venue}/management/httpd/loadbalancer-url" } diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 7803b985..120073fe 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -206,6 +206,7 @@ resource "kubernetes_service" "ogc_processes_api" { } } +/* Note: re-enable this to allow access via the JPL network resource "aws_security_group" "ogc_ingress_sg" { name = "${var.project}-${var.venue}-ogc-ingress-sg" description = "SecurityGroup for OGC API LoadBalancer ingress" @@ -215,7 +216,7 @@ resource "aws_security_group" "ogc_ingress_sg" { Component = "ogc" Stack = "ogc" }) -} +}*/ resource "aws_security_group" "ogc_ingress_sg_internal" { name = "${var.project}-${var.venue}-ogc-internal-ingress-sg" @@ -228,6 +229,7 @@ resource "aws_security_group" "ogc_ingress_sg_internal" { }) } +/* Note: re-enable this to allow access via the JPL network #tfsec:ignore:AVD-AWS-0107 resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_jpl_rule" { for_each = toset(["128.149.0.0/16", "137.78.0.0/16", "137.79.0.0/16"]) @@ -237,7 +239,7 @@ resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_jpl_rule" { from_port = local.load_balancer_port to_port = local.load_balancer_port cidr_ipv4 = each.key -} +}*/ data "aws_security_groups" "venue_proxy_sg" { filter { @@ -260,6 +262,7 @@ resource "aws_vpc_security_group_ingress_rule" "ogc_ingress_sg_proxy_rule" { referenced_security_group_id = data.aws_security_groups.venue_proxy_sg.ids[0] } +/* Note: re-enable this to allow access via the JPL network resource "kubernetes_ingress_v1" "ogc_processes_api_ingress" { metadata { name = "ogc-processes-api-ingress" @@ -296,7 +299,7 @@ resource "kubernetes_ingress_v1" "ogc_processes_api_ingress" { } } wait_for_load_balancer = true -} +}*/ resource "kubernetes_ingress_v1" "ogc_processes_api_ingress_internal" { metadata { @@ -338,24 +341,26 @@ resource "aws_ssm_parameter" "ogc_processes_ui_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "ui_url"]))) description = "The URL of the OGC Proccesses API Docs UI." type = "String" - value = "https://${data.kubernetes_ingress_v1.ogc_processes_api_ingress.status[0].load_balancer[0].ingress[0].hostname}:5001/redoc" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/redoc" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_ui") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "ogc_processes_api_url" { name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "ogc_processes", "api_url"]))) description = "The URL of the OGC Processes REST API." type = "String" - value = "https://${data.kubernetes_ingress_v1.ogc_processes_api_ingress.status[0].load_balancer[0].ingress[0].hostname}:5001" + value = "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "endpoints-ogc_processes_api") Component = "SSM" Stack = "SSM" }) + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { @@ -364,8 +369,8 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { type = "String" value = jsonencode({ "componentName" : "OGC API" - "healthCheckUrl" : "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/health" - "landingPageUrl" : "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001" + "healthCheckUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/health" + "landingPageUrl" : "https://www.${data.aws_ssm_parameter.shared_services_domain.value}:4443/${var.project}/${var.venue}/ogc/" }) tags = merge(local.common_tags, { Name = format(local.resource_name_prefix, "health-check-endpoints-ogc_processes_api") @@ -375,6 +380,7 @@ resource "aws_ssm_parameter" "ogc_processes_api_health_check_endpoint" { lifecycle { ignore_changes = [value] } + depends_on = [aws_ssm_parameter.unity_proxy_ogc_api] } resource "aws_ssm_parameter" "unity_proxy_ogc_api" { @@ -387,7 +393,7 @@ resource "aws_ssm_parameter" "unity_proxy_ogc_api" { ProxyPassReverse "/" - ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" + ProxyPassMatch "http://${data.kubernetes_ingress_v1.ogc_processes_api_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:5001/$1" retry=5 disablereuse=On ProxyPreserveHost On FallbackResource /management/index.html AddOutputFilterByType INFLATE;SUBSTITUTE;DEFLATE text/html @@ -405,8 +411,8 @@ EOT data "aws_lambda_functions" "lambda_check_all" {} resource "aws_lambda_invocation" "unity_proxy_lambda_invocation" { - count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "unity-${var.venue}-httpdproxymanagement") ? 1 : 0 - function_name = "unity-${var.venue}-httpdproxymanagement" + count = contains(data.aws_lambda_functions.lambda_check_all.function_names, "${var.project}-${var.venue}-httpdproxymanagement") ? 1 : 0 + function_name = "${var.project}-${var.venue}-httpdproxymanagement" input = "{}" triggers = { redeployment = sha1(jsonencode([ diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf index a8302464..e4e892d0 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/outputs.tf @@ -11,3 +11,15 @@ output "ogc_processes_urls" { } } } + +output "ogc_processes_venue_urls" { + description = "URLs for the various OGC Processes endpoints at venue-proxy level." + value = { + "ui" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "ogc/redoc")) + } + "rest_api" = { + "url" = nonsensitive(replace(data.aws_ssm_parameter.venue_proxy_baseurl.value, "management/ui", "ogc/")) + } + } +} diff --git a/terraform-unity/outputs.tf b/terraform-unity/outputs.tf index 4d1eeb00..b0e84a8d 100644 --- a/terraform-unity/outputs.tf +++ b/terraform-unity/outputs.tf @@ -5,6 +5,10 @@ output "resources" { "airflow" = module.unity-sps-airflow.airflow_urls "ogc_processes" = module.unity-sps-ogc-processes-api.ogc_processes_urls } + "venue_endpoints" = { + "airflow" = module.unity-sps-airflow.airflow_venue_urls + "ogc_processes" = module.unity-sps-ogc-processes-api.ogc_processes_venue_urls + } "buckets" = module.unity-sps-airflow.s3_buckets } } diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index dfe3d4e5..d0843269 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -338,3 +338,24 @@ variable "dag_catalog_repo" { dags_directory_path = "airflow/dags" } } + +# tflint-ignore: terraform_unused_declarations +variable "deployment_name" { + description = "The name of the deployment." + type = string + default = "" +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags for the deployment (unused)" + type = map(string) + default = { empty = "" } +} + +# tflint-ignore: terraform_unused_declarations +variable "installprefix" { + description = "The install prefix for the service area (unused)" + type = string + default = "" +}