diff --git a/terraform-modules/terraform-unity-sps-airflow/.terraform.lock.hcl b/terraform-modules/terraform-unity-sps-airflow/.terraform.lock.hcl new file mode 100644 index 0000000..b6626af --- /dev/null +++ b/terraform-modules/terraform-unity-sps-airflow/.terraform.lock.hcl @@ -0,0 +1,22 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/kubernetes" { + version = "2.19.0" + constraints = "2.19.0" + hashes = [ + "h1:ID/u9YOv00w+Z8iG+592oyuV7HcqRmPiZpEC9hnyTMY=", + "zh:028d346460de2d1d19b4c863dfc36be51c7bcd97d372b54a3a946bcb19f3f613", + "zh:391d0b38c455437d0a2ab1beb6ce6e1230aa4160bbae11c58b2810b258b44280", + "zh:40ea742f91b67f66e71d7091cfd40cc604528c4947651924bd6d8bd8d9793708", + "zh:48a99d341c8ba3cadaafa7cb99c0f11999f5e23f5cfb0f8469b4e352d9116e74", + "zh:4a5ade940eff267cbf7dcd52c1a7ac3999e7cc24996a409bd8b37bdb48a97f02", + "zh:5063742016a8249a4be057b9cc0ef24a684ec76d0ae5463d4b07e9b2d21e047e", + "zh:5d36b3a5662f840a6788f5e2a19d02139e87318feb3c5d82c7d076be1366fec4", + "zh:75edd9960cb30e54ef7de1b7df2761a274f17d4d41f54e72f86b43f41af3eb6d", + "zh:b85cadef3e6f25f1a10a617472bf5e8449decd61626733a1bc723de5edc08f64", + "zh:dc565b17b4ea6dde6bd1b92bc37e5e850fcbf9400540eec00ad3d9552a76ac2e", + "zh:deb665cc2123f2701aa3d653987b2ca35fb035a08a76a2382efb215c209f19a5", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/terraform-modules/terraform-unity-sps-airflow/main.tf b/terraform-modules/terraform-unity-sps-airflow/main.tf new file mode 100644 index 0000000..3e07012 --- /dev/null +++ b/terraform-modules/terraform-unity-sps-airflow/main.tf @@ -0,0 +1,809 @@ +provider "kubernetes" { + config_path = var.kubeconfig_filepath + insecure = true +} + + +# resource "kubernetes_namespace" "unity-sps" { +# metadata { +# name = var.namespace +# } +# } + +resource "kubernetes_persistent_volume" "airflow_home_pv" { + metadata { + name = "airflow-home-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.airflow_home + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "airflow_home_pvc" { + metadata { + name = "airflow-home-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.airflow_home_pv.metadata[0].name + } +} + +resource "kubernetes_persistent_volume" "cwl_tmp_pv" { + metadata { + name = "cwl-tmp-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.cwl_tmp_folder + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "cwl_tmp_pvc" { + metadata { + name = "cwl-tmp-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.cwl_tmp_pv.metadata[0].name + } +} + +resource "kubernetes_persistent_volume" "cwl_inputs_pv" { + metadata { + name = "cwl-inputs-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.cwl_inputs_folder + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "cwl_inputs_pvc" { + metadata { + name = "cwl-inputs-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.cwl_inputs_pv.metadata[0].name + } +} + +resource "kubernetes_persistent_volume" "cwl_outputs_pv" { + metadata { + name = "cwl-outputs-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.cwl_outputs_folder + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "cwl_outputs_pvc" { + metadata { + name = "cwl-outputs-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.cwl_outputs_pv.metadata[0].name + } +} + +resource "kubernetes_persistent_volume" "cwl_pickle_pv" { + metadata { + name = "cwl-pickle-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.cwl_pickle_folder + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "cwl_pickle_pvc" { + metadata { + name = "cwl-pickle-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.cwl_pickle_pv.metadata[0].name + } +} + + +resource "kubernetes_deployment" "scheduler" { + metadata { + name = "scheduler" + namespace = "unity-sps" + labels = { + app = "scheduler" + } + } + spec { + replicas = 1 + + selector { + match_labels = { + app = "scheduler" + } + } + template { + metadata { + labels = { + app = "scheduler" + } + } + spec { + node_selector = { + "eks.amazonaws.com/nodegroup" = "unity-dev-sps-EKS-VerdiNodeGroup" + } + container { + name = "dind-daemon" + image = var.docker_images.dind + image_pull_policy = "Always" + env { + name = "DOCKER_TLS_CERTDIR" + value = "" + } + resources { + requests = { + cpu = "20m" + memory = "512Mi" + } + } + security_context { + privileged = true + } + args = ["--tls=false"] + lifecycle { + post_start { + exec { + # Note: must wait a few seconds for the Docker engine to start and the file to be created + command = [ + "bin/sh", + "-c", + <<-EOT + sleep 5 && \ + chmod 777 /var/run/docker.sock + EOT + ] + } + } + } + # Empty directory where the Docker engine indices the images + volume_mount { + name = "docker-graph-storage" + mount_path = "/var/lib/docker" + } + # The Docker socket must be shared with Verdi container + volume_mount { + name = "docker-sock-dir" + mount_path = "/var/run" + sub_path = "docker.sock" + } + } + container { + image = var.docker_images.airflow_cwl + image_pull_policy = "IfNotPresent" + name = "scheduler" + args = ["start_scheduler.sh"] + security_context { + privileged = true + } + volume_mount { + mount_path = var.airflow_home + name = "airflow-home" + } + volume_mount { + mount_path = var.cwl_tmp_folder + name = "cwl-tmp" + } + volume_mount { + mount_path = var.cwl_inputs_folder + name = "cwl-inputs" + } + volume_mount { + mount_path = var.cwl_outputs_folder + name = "cwl-outputs" + } + volume_mount { + mount_path = var.cwl_pickle_folder + name = "cwl-pickle" + } + volume_mount { + name = "docker-sock-dir" + mount_path = "/var/run" + sub_path = "docker.sock" + } + env { + name = "AIRFLOW_HOME" + value = var.airflow_home + } + env { + name = "PROCESS_REPORT_URL" + value = var.process_report_url + } + env { + name = "AIRFLOW__CORE__EXECUTOR" + value = "LocalExecutor" + } + env { + name = "AIRFLOW__CORE__SQL_ALCHEMY_CONN" + value = "mysql://${var.mysql_user}:${var.mysql_password}@mysql:3306/${var.mysql_database}" + } + env { + name = "AIRFLOW__CORE__DAGS_FOLDER" + value = "${var.airflow_home}/dags" + } + env { + name = "AIRFLOW__CORE__BASE_LOG_FOLDER" + value = "${var.airflow_home}/logs" + } + env { + name = "AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION" + value = "${var.airflow_home}/logs/dag_processor_manager/dag_processor_manager.log" + } + env { + name = "AIRFLOW__CORE__PLUGINS_FOLDER" + value = "${var.airflow_home}/plugins" + } + env { + name = "AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY" + value = "${var.airflow_home}/logs/scheduler" + } + } + volume { + name = "airflow-home" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.airflow_home_pvc.metadata[0].name + } + } + volume { + name = "cwl-tmp" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_tmp_pvc.metadata[0].name + } + } + volume { + name = "cwl-inputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_inputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-outputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_outputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-pickle" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_pickle_pvc.metadata[0].name + } + } + # Shared direcrtory holding the Docker socket + volume { + name = "docker-sock-dir" + empty_dir {} + } + # Clean Docker storage + volume { + name = "docker-graph-storage" + empty_dir {} + } + restart_policy = "Always" + } + } + } + depends_on = [kubernetes_service.mysql] +} + +resource "kubernetes_persistent_volume" "mysql_data_pv" { + metadata { + name = "mysql-data-pv" + } + spec { + storage_class_name = var.storage_class_name + capacity = { + storage = "10Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + host_path { + path = var.mysql_data_folder + } + } + } +} + +resource "kubernetes_persistent_volume_claim" "mysql_data_pvc" { + metadata { + name = "mysql-data-pvc" + namespace = "unity-sps" + } + spec { + storage_class_name = var.storage_class_name + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "10Gi" + } + } + volume_name = kubernetes_persistent_volume.mysql_data_pv.metadata[0].name + } +} + + +resource "kubernetes_deployment" "mysql" { + metadata { + name = "mysql" + namespace = "unity-sps" + labels = { + app = "mysql" + } + } + + spec { + replicas = 1 + + selector { + match_labels = { + app = "mysql" + } + } + + template { + metadata { + labels = { + app = "mysql" + } + } + + spec { + node_selector = { + "eks.amazonaws.com/nodegroup" = "unity-dev-sps-EKS-VerdiNodeGroup" + } + container { + image = var.docker_images.mysql + name = "mysql" + args = ["--explicit-defaults-for-timestamp=1"] + + env { + name = "MYSQL_ROOT_PASSWORD" + value = var.mysql_root_password + } + env { + name = "MYSQL_DATABASE" + value = var.mysql_database + } + env { + name = "MYSQL_USER" + value = var.mysql_user + } + env { + name = "MYSQL_PASSWORD" + value = var.mysql_password + } + + volume_mount { + mount_path = "/var/lib/mysql" + name = "mysql-data" + } + } + + volume { + name = "mysql-data" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.mysql_data_pvc.metadata[0].name + } + } + } + } + } +} + +resource "kubernetes_service" "mysql" { + metadata { + name = "mysql" + namespace = "unity-sps" + } + spec { + selector = { + app = "mysql" + } + port { + port = var.service_port_map.mysql_service + target_port = var.service_port_map.mysql_service + } + + type = "ClusterIP" + } +} + + +resource "kubernetes_deployment" "webserver" { + metadata { + name = "webserver" + namespace = "unity-sps" + labels = { + app = "webserver" + } + } + spec { + replicas = 1 + + selector { + match_labels = { + app = "webserver" + } + } + template { + metadata { + labels = { + app = "webserver" + } + } + spec { + node_selector = { + "eks.amazonaws.com/nodegroup" = "unity-dev-sps-EKS-VerdiNodeGroup" + } + container { + image = var.docker_images.airflow_cwl + image_pull_policy = "IfNotPresent" + name = "webserver" + args = ["start_webserver.sh"] + security_context { + privileged = true + } + volume_mount { + mount_path = var.airflow_home + name = "airflow-home" + } + volume_mount { + mount_path = var.cwl_tmp_folder + name = "cwl-tmp" + } + volume_mount { + mount_path = var.cwl_inputs_folder + name = "cwl-inputs" + } + volume_mount { + mount_path = var.cwl_outputs_folder + name = "cwl-outputs" + } + volume_mount { + mount_path = var.cwl_pickle_folder + name = "cwl-pickle" + } + env { + name = "AIRFLOW_HOME" + value = var.airflow_home + } + env { + name = "PROCESS_REPORT_URL" + value = var.process_report_url + } + env { + name = "AIRFLOW__CORE__EXECUTOR" + value = "LocalExecutor" + } + env { + name = "AIRFLOW__CORE__SQL_ALCHEMY_CONN" + value = "mysql://${var.mysql_user}:${var.mysql_password}@mysql:3306/${var.mysql_database}" + } + env { + name = "AIRFLOW__CORE__DAGS_FOLDER" + value = "${var.airflow_home}/dags" + } + env { + name = "AIRFLOW__CORE__BASE_LOG_FOLDER" + value = "${var.airflow_home}/logs" + } + env { + name = "AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION" + value = "${var.airflow_home}/logs/dag_processor_manager/dag_processor_manager.log" + } + env { + name = "AIRFLOW__CORE__PLUGINS_FOLDER" + value = "${var.airflow_home}/plugins" + } + env { + name = "AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY" + value = "${var.airflow_home}/logs/scheduler" + } + } + volume { + name = "airflow-home" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.airflow_home_pvc.metadata[0].name + } + } + volume { + name = "cwl-tmp" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_tmp_pvc.metadata[0].name + } + } + volume { + name = "cwl-inputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_inputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-outputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_outputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-pickle" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_pickle_pvc.metadata[0].name + } + } + restart_policy = "Always" + } + } + } + depends_on = [kubernetes_service.mysql, kubernetes_deployment.scheduler] +} + +resource "kubernetes_service" "webserver" { + metadata { + name = "webserver" + namespace = "unity-sps" + annotations = { + "service.beta.kubernetes.io/aws-load-balancer-subnets" = var.elb_subnets + } + } + spec { + selector = { + app = "webserver" + } + type = var.service_type + port { + protocol = "TCP" + port = var.service_port_map.airflow_cwl_webserver_service + target_port = 8080 + } + # port { + # port = 8080 + # target_port = 8080 + # node_port = 30080 + # } + # type = "NodePort" + } +} + + +resource "kubernetes_deployment" "apiserver" { + metadata { + name = "apiserver" + namespace = "unity-sps" + labels = { + app = "apiserver" + } + } + spec { + replicas = 1 + + selector { + match_labels = { + app = "apiserver" + } + } + template { + metadata { + labels = { + app = "apiserver" + } + } + spec { + node_selector = { + "eks.amazonaws.com/nodegroup" = "unity-dev-sps-EKS-VerdiNodeGroup" + } + container { + image = var.docker_images.airflow_cwl + image_pull_policy = "IfNotPresent" + name = "apiserver" + args = ["start_apiserver.sh", "--replay", "60", "--host", "0.0.0.0"] + security_context { + privileged = true + } + volume_mount { + mount_path = var.airflow_home + name = "airflow-home" + } + volume_mount { + mount_path = var.cwl_tmp_folder + name = "cwl-tmp" + } + volume_mount { + mount_path = var.cwl_inputs_folder + name = "cwl-inputs" + } + volume_mount { + mount_path = var.cwl_outputs_folder + name = "cwl-outputs" + } + volume_mount { + mount_path = var.cwl_pickle_folder + name = "cwl-pickle" + } + env { + name = "AIRFLOW_HOME" + value = var.airflow_home + } + env { + name = "PROCESS_REPORT_URL" + value = var.process_report_url + } + env { + name = "AIRFLOW__CORE__EXECUTOR" + value = "LocalExecutor" + } + env { + name = "AIRFLOW__CORE__SQL_ALCHEMY_CONN" + value = "mysql://${var.mysql_user}:${var.mysql_password}@mysql:3306/${var.mysql_database}" + } + env { + name = "AIRFLOW__CORE__DAGS_FOLDER" + value = "${var.airflow_home}/dags" + } + env { + name = "AIRFLOW__CORE__BASE_LOG_FOLDER" + value = "${var.airflow_home}/logs" + } + env { + name = "AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION" + value = "${var.airflow_home}/logs/dag_processor_manager/dag_processor_manager.log" + } + env { + name = "AIRFLOW__CORE__PLUGINS_FOLDER" + value = "${var.airflow_home}/plugins" + } + env { + name = "AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY" + value = "${var.airflow_home}/logs/scheduler" + } + } + volume { + name = "airflow-home" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.airflow_home_pvc.metadata[0].name + } + } + volume { + name = "cwl-tmp" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_tmp_pvc.metadata[0].name + } + } + volume { + name = "cwl-inputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_inputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-outputs" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_outputs_pvc.metadata[0].name + } + } + volume { + name = "cwl-pickle" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.cwl_pickle_pvc.metadata[0].name + } + } + restart_policy = "Always" + } + } + } + depends_on = [kubernetes_service.mysql, kubernetes_deployment.scheduler] +} + +resource "kubernetes_service" "apiserver" { + metadata { + name = "apiserver" + namespace = "unity-sps" + annotations = { + "service.beta.kubernetes.io/aws-load-balancer-subnets" = var.elb_subnets + } + } + spec { + selector = { + app = "apiserver" + } + # type = "NodePort" + # port { + # port = 8081 + # target_port = 8081 + # node_port = 30081 + # } + type = var.service_type + port { + protocol = "TCP" + port = var.service_port_map.airflow_cwl_apiserver_service + target_port = 8081 + } + } +} diff --git a/terraform-modules/terraform-unity-sps-airflow/variables.tf b/terraform-modules/terraform-unity-sps-airflow/variables.tf new file mode 100644 index 0000000..6da3d4e --- /dev/null +++ b/terraform-modules/terraform-unity-sps-airflow/variables.tf @@ -0,0 +1,99 @@ +variable "kubeconfig_filepath" { + description = "The file path of the kubeconfig file" + type = string +} + +variable "namespace" { + description = "The Kubernetes namespace to create resources in" + type = string +} + +variable "airflow_home" { + description = "The Airflow home directory" + type = string +} + +variable "storage_class_name" { + description = "The Kuberenetes storage class." + type = string + default = "gp2" +} + +variable "cwl_tmp_folder" { + description = "The CWL temp directory" + type = string +} + +variable "cwl_inputs_folder" { + description = "The CWL inputs directory" + type = string +} + +variable "cwl_outputs_folder" { + description = "The CWL outputs directory" + type = string +} + +variable "cwl_pickle_folder" { + description = "The CWL pickle directory" + type = string +} + +variable "process_report_url" { + description = "The process report URL for Airflow" + type = string +} + +variable "mysql_user" { + description = "The MySQL user" + type = string +} + +variable "mysql_password" { + description = "The MySQL password" + type = string +} + +variable "mysql_database" { + type = string +} + +variable "mysql_data_folder" { + type = string +} + +variable "mysql_root_password" { + description = "The MySQL root password" + type = string +} + +variable "service_type" { + description = "value" + type = string + default = "LoadBalancer" +} + +variable "service_port_map" { + description = "value" + type = map(number) + default = { + "mysql_service" = 3306 + "airflow_cwl_webserver_service" = 8080 + "airflow_cwl_apiserver_service" = 8081 + } +} + +variable "elb_subnets" { + description = "value" + type = string +} + +variable "docker_images" { + description = "Docker images for the Unity SPS containers" + type = map(string) + default = { + dind = "docker:23.0.3-dind" + mysql = "biarms/mysql:5.7" + airflow_cwl = "ghcr.io/unity-sds/unity-sps-prototype/sps-cwl-airflow:dev" + } +} diff --git a/terraform-modules/terraform-unity-sps-airflow/versions.tf b/terraform-modules/terraform-unity-sps-airflow/versions.tf new file mode 100644 index 0000000..b790faa --- /dev/null +++ b/terraform-modules/terraform-unity-sps-airflow/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 1.3.6" + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.19.0" + } + } +}