From 95def33db96c55a640fba4df5bdfbcc3a179d8ac Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Mon, 8 Jan 2024 10:28:42 -0500 Subject: [PATCH] feat!: Init operator (#154) * operator module * pass license * Update main.tf * fix: Allow for inbound-cidrs for the ALB * Disabling otel per DD potential Conflict * Revert "fix: Allow for inbound-cidrs for the ALB" This reverts commit 3cb558b0f481ebef424fedb07861d5744ee34c83. * fixing a bug, can't quote the list of CIDRs * fxing MR mistake * Update main.tf * Add extra envs * testing tf change * removing test tf change * adding efs-csi-driver for weave * adding efs-csi-driver policy for weave * adding efs-csi-driver policy for weave and fmting * namespacing fix * fixing arn * updating policy * updates for weave EFS storage class * remove debug block * fix a provider error * removing name * updating sg name * refactor to app-eks * adding aws_security_group_rule * adding aws_security_group_rule * adding aws_security_group_rule * weird spacing issue * weird spacing issue * Fix bucket kms key arn for external buckets * fixing the username->user typo --------- Co-authored-by: Zachary Blasczyk Co-authored-by: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> --- main.tf | 130 ++++++++++--------- modules/app_eks/efs.tf | 41 ++++++ modules/app_eks/external_dns/external_dns.tf | 4 +- modules/app_eks/external_dns/variables.tf | 2 +- modules/app_eks/iam-policy-docs.tf | 2 +- modules/app_eks/iam-role-attachments.tf | 5 + modules/app_eks/lb_controller/controller.tf | 2 +- modules/app_eks/lb_controller/variables.tf | 2 +- modules/app_eks/main.tf | 18 +-- modules/app_eks/outputs.tf | 13 +- modules/file_storage/outputs.tf | 2 +- modules/networking/main.tf | 4 +- outputs.tf | 1 - variables.tf | 13 +- 14 files changed, 155 insertions(+), 84 deletions(-) create mode 100644 modules/app_eks/efs.tf diff --git a/main.tf b/main.tf index 4cb49865..6729136a 100644 --- a/main.tf +++ b/main.tf @@ -183,62 +183,74 @@ module "redis" { kms_key_arn = local.kms_key_arn } -# Comming soon! -# module "wandb" { -# source = "wandb/wandb/helm" -# version = "1.2.0" - -# depends_on = [ -# module.database, -# module.app_eks, -# module.redis, -# ] - -# operator_chart_version = "1.1.0" -# controller_image_tag = "1.10.1" - -# spec = { -# values = { -# global = { -# host = local.url -# license = var.license - -# bucket = { -# provider = "s3" -# name = local.bucket_name -# region = data.aws_s3_bucket.file_storage.region -# kmsKey = local.kms_key_arn -# } - -# mysql = { -# host = module.database.endpoint -# password = module.database.password -# username = module.database.username -# database = module.database.database_name -# port = module.database.port -# } - -# redis = { -# host = module.redis.0.host -# port = "${module.redis.0.port}?tls=true" -# } -# } - -# ingress = { -# class = "alb" - -# annotations = { -# "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" -# "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" -# "alb.ingress.kubernetes.io/scheme" = "internet-facing" -# "alb.ingress.kubernetes.io/target-type" = "ip" -# "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" -# "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn -# } -# } - -# mysql = { install = false } -# redis = { install = false } -# } -# } -# } +module "wandb" { + source = "wandb/wandb/helm" + version = "1.2.0" + + depends_on = [ + module.database, + module.app_eks, + module.redis, + ] + operator_chart_version = "1.1.0" + controller_image_tag = "1.10.1" + + spec = { + values = { + global = { + host = local.url + license = var.license + + extraEnv = var.other_wandb_env + + bucket = { + provider = "s3" + name = local.bucket_name + region = data.aws_s3_bucket.file_storage.region + kmsKey = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + } + + mysql = { + host = module.database.endpoint + password = module.database.password + user = module.database.username + database = module.database.database_name + port = module.database.port + } + + redis = { + host = module.redis.0.host + port = "${module.redis.0.port}?tls=true" + } + } + + ingress = { + class = "alb" + + annotations = { + "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" + "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF + ${join("\\,", var.allowed_inbound_cidr)} + EOF + "alb.ingress.kubernetes.io/scheme" = "internet-facing" + "alb.ingress.kubernetes.io/target-type" = "ip" + "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" + "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn + } + } + + mysql = { install = false } + redis = { install = false } + + weave = { + persistence = { + provider = "efs" + efs = { + fileSystemId = module.app_eks.efs_id + } + + } + } + } + } +} diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf new file mode 100644 index 00000000..1cfcbe47 --- /dev/null +++ b/modules/app_eks/efs.tf @@ -0,0 +1,41 @@ +resource "random_pet" "efs" { + length = 2 +} + +resource "aws_efs_file_system" "storage_class" { + creation_token = "${var.namespace}-${random_pet.efs.id}" + encrypted = true + performance_mode = "generalPurpose" + throughput_mode = "elastic" +} + +resource "aws_efs_backup_policy" "storage_class" { + file_system_id = aws_efs_file_system.storage_class.id + + backup_policy { + status = "DISABLED" + } +} + +resource "aws_security_group" "storage_class_nfs" { + name = "${var.namespace}-${random_pet.efs.id}" + description = "Security group for NFS traffic" + vpc_id = var.network_id +} + +resource "aws_security_group_rule" "nfs_ingress" { + description = "NFS inbound" + type = "ingress" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + security_group_id = aws_security_group.storage_class_nfs.id + source_security_group_id = aws_security_group.primary_workers.id +} + +resource "aws_efs_mount_target" "storage_class" { + for_each = { for subnet in var.network_private_subnets : subnet => subnet } + file_system_id = aws_efs_file_system.storage_class.id + subnet_id = each.value + security_groups = [aws_security_group.storage_class_nfs.id] +} diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index 15f826ae..f71aefd3 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -21,7 +21,7 @@ resource "helm_release" "external_dns" { } set { - name = "domainFilters[0]" + name = "domainFilters[0]" value = var.fqdn } @@ -29,4 +29,4 @@ resource "helm_release" "external_dns" { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn } -} \ No newline at end of file +} diff --git a/modules/app_eks/external_dns/variables.tf b/modules/app_eks/external_dns/variables.tf index 4e33cb7f..0626c3d2 100644 --- a/modules/app_eks/external_dns/variables.tf +++ b/modules/app_eks/external_dns/variables.tf @@ -1,5 +1,5 @@ variable "namespace" { - type = string + type = string } variable "oidc_provider" { diff --git a/modules/app_eks/iam-policy-docs.tf b/modules/app_eks/iam-policy-docs.tf index 5399aef0..4e7f27b4 100644 --- a/modules/app_eks/iam-policy-docs.tf +++ b/modules/app_eks/iam-policy-docs.tf @@ -68,7 +68,7 @@ data "aws_iam_policy_document" "secrets_manager" { "secretsmanager:GetSecretValue", "secretsmanager:DeleteSecretVersion" ] - effect = "Allow" + effect = "Allow" resources = ["arn:aws:secretsmanager:*:${data.aws_caller_identity.current.account_id}:secret:${var.namespace}*"] } } diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index 938ad34b..e82fe63b 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -28,6 +28,11 @@ resource "aws_iam_role_policy_attachment" "eks_cni" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" } +resource "aws_iam_role_policy_attachment" "eks_efs" { + role = aws_iam_role.node.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy" +} + resource "aws_iam_role_policy_attachment" "eks_worker_node" { role = aws_iam_role.node.name policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" diff --git a/modules/app_eks/lb_controller/controller.tf b/modules/app_eks/lb_controller/controller.tf index e6a572f0..eff2042c 100644 --- a/modules/app_eks/lb_controller/controller.tf +++ b/modules/app_eks/lb_controller/controller.tf @@ -3,7 +3,7 @@ resource "helm_release" "aws_load_balancer_controller" { repository = "https://aws.github.io/eks-charts" chart = "aws-load-balancer-controller" namespace = "kube-system" - version = "1.6.1" + version = "1.6.2" set { name = "clusterName" diff --git a/modules/app_eks/lb_controller/variables.tf b/modules/app_eks/lb_controller/variables.tf index be3e27a4..49fe5944 100644 --- a/modules/app_eks/lb_controller/variables.tf +++ b/modules/app_eks/lb_controller/variables.tf @@ -1,5 +1,5 @@ variable "namespace" { - type = string + type = string } variable "oidc_provider" { diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index d58211b4..063ff729 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -15,6 +15,16 @@ resource "aws_eks_addon" "eks" { ] } +resource "aws_eks_addon" "efs" { + cluster_name = module.eks.cluster_id + addon_name = "aws-efs-csi-driver" + addon_version = "v1.7.1-eksbuild.1" # Ensure this version is compatible + resolve_conflicts = "OVERWRITE" + depends_on = [ + module.eks + ] +} + # removed due to conflict with # AWS Load Balancer Controller # being installed with Helm. @@ -25,14 +35,6 @@ resource "aws_eks_addon" "eks" { # depends_on = [module.eks] #} -locals { - managed_policy_arns = concat([ - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", - ], var.eks_policy_arns) -} - module "eks" { source = "terraform-aws-modules/eks/aws" version = "~> 17.23" diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 0bf3c8b6..304b51db 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -1,12 +1,19 @@ +output "autoscaling_group_names" { + value = { for name, value in module.eks.node_groups : name => lookup(lookup(lookup(value, "resources")[0], "autoscaling_groups")[0], "name") } +} output "cluster_id" { value = module.eks.cluster_id description = "ID of the created EKS cluster" } -output "autoscaling_group_names" { - value = { for name, value in module.eks.node_groups : name => lookup(lookup(lookup(value, "resources")[0], "autoscaling_groups")[0], "name") } +output "efs_id" { + value = aws_efs_file_system.storage_class.id } output "node_role" { value = aws_iam_role.node -} \ No newline at end of file +} + +output "primary_workers_security_group_id" { + value = aws_security_group.primary_workers.id +} diff --git a/modules/file_storage/outputs.tf b/modules/file_storage/outputs.tf index 3e6815e5..9beae402 100644 --- a/modules/file_storage/outputs.tf +++ b/modules/file_storage/outputs.tf @@ -20,4 +20,4 @@ output "bucket_queue_name" { output "bucket_queue_arn" { value = var.create_queue ? aws_sqs_queue.file_storage.0.arn : null -} \ No newline at end of file +} diff --git a/modules/networking/main.tf b/modules/networking/main.tf index 328f234b..21382052 100644 --- a/modules/networking/main.tf +++ b/modules/networking/main.tf @@ -30,10 +30,10 @@ module "vpc" { single_nat_gateway = false private_subnet_tags = { - "kubernetes.io/role/internal-elb" = "1" + "kubernetes.io/role/internal-elb" = "1" } public_subnet_tags = { - "kubernetes.io/role/elb" = "1" + "kubernetes.io/role/elb" = "1" } } diff --git a/outputs.tf b/outputs.tf index 5d9a20ef..43e88048 100644 --- a/outputs.tf +++ b/outputs.tf @@ -55,4 +55,3 @@ output "url" { value = local.url description = "The URL to the W&B application" } - diff --git a/variables.tf b/variables.tf index 8d2278d6..12404d43 100644 --- a/variables.tf +++ b/variables.tf @@ -327,8 +327,13 @@ variable "elasticache_node_type" { # ########################################## # # Weights & Biases # # ########################################## -# variable "license" { -# type = string -# description = "Weights & Biases license key." -# } +variable "license" { + type = string + description = "Weights & Biases license key." +} +variable "other_wandb_env" { + type = map(any) + description = "Extra environment variables for W&B" + default = {} +} \ No newline at end of file