From f8f0a15db0c41cc2e2dc820bbd7e484ed40c5fe9 Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Wed, 15 Nov 2023 13:49:39 -0500 Subject: [PATCH 01/31] operator module --- main.tf | 118 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/main.tf b/main.tf index 4cb49865..a107d411 100644 --- a/main.tf +++ b/main.tf @@ -183,62 +183,62 @@ module "redis" { kms_key_arn = local.kms_key_arn } -# Comming soon! -# module "wandb" { -# source = "wandb/wandb/helm" -# version = "1.2.0" - -# depends_on = [ -# module.database, -# module.app_eks, -# module.redis, -# ] - -# operator_chart_version = "1.1.0" -# controller_image_tag = "1.10.1" - -# spec = { -# values = { -# global = { -# host = local.url -# license = var.license - -# bucket = { -# provider = "s3" -# name = local.bucket_name -# region = data.aws_s3_bucket.file_storage.region -# kmsKey = local.kms_key_arn -# } - -# mysql = { -# host = module.database.endpoint -# password = module.database.password -# username = module.database.username -# database = module.database.database_name -# port = module.database.port -# } - -# redis = { -# host = module.redis.0.host -# port = "${module.redis.0.port}?tls=true" -# } -# } - -# ingress = { -# class = "alb" - -# annotations = { -# "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" -# "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" -# "alb.ingress.kubernetes.io/scheme" = "internet-facing" -# "alb.ingress.kubernetes.io/target-type" = "ip" -# "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" -# "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn -# } -# } - -# mysql = { install = false } -# redis = { install = false } -# } -# } -# } +Comming soon! +module "wandb" { + source = "wandb/wandb/helm" + version = "1.2.0" + + depends_on = [ + module.database, + module.app_eks, + module.redis, + ] + + operator_chart_version = "1.1.0" + controller_image_tag = "1.10.1" + + spec = { + values = { + global = { + host = local.url + license = var.license + + bucket = { + provider = "s3" + name = local.bucket_name + region = data.aws_s3_bucket.file_storage.region + kmsKey = local.kms_key_arn + } + + mysql = { + host = module.database.endpoint + password = module.database.password + username = module.database.username + database = module.database.database_name + port = module.database.port + } + + redis = { + host = module.redis.0.host + port = "${module.redis.0.port}?tls=true" + } + } + + ingress = { + class = "alb" + + annotations = { + "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" + "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" + "alb.ingress.kubernetes.io/scheme" = "internet-facing" + "alb.ingress.kubernetes.io/target-type" = "ip" + "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" + "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn + } + } + + mysql = { install = false } + redis = { install = false } + } + } +} From 0c99c7cffa615b7eb3b06b82789b33d0557a6027 Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Wed, 15 Nov 2023 13:56:10 -0500 Subject: [PATCH 02/31] pass license --- variables.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variables.tf b/variables.tf index 8d2278d6..5622cf5d 100644 --- a/variables.tf +++ b/variables.tf @@ -327,8 +327,8 @@ variable "elasticache_node_type" { # ########################################## # # Weights & Biases # # ########################################## -# variable "license" { -# type = string -# description = "Weights & Biases license key." -# } +variable "license" { + type = string + description = "Weights & Biases license key." +} From a266648a51941157dbe1daffed73f74aadeab0dc Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Wed, 15 Nov 2023 14:51:44 -0500 Subject: [PATCH 03/31] Update main.tf --- main.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.tf b/main.tf index a107d411..bf734fca 100644 --- a/main.tf +++ b/main.tf @@ -183,7 +183,6 @@ module "redis" { kms_key_arn = local.kms_key_arn } -Comming soon! module "wandb" { source = "wandb/wandb/helm" version = "1.2.0" From 3cb558b0f481ebef424fedb07861d5744ee34c83 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 1 Dec 2023 13:21:49 -0600 Subject: [PATCH 04/31] fix: Allow for inbound-cidrs for the ALB --- main.tf | 4 +++- modules/app_eks/external_dns/external_dns.tf | 4 ++-- modules/app_eks/lb_controller/controller.tf | 2 +- modules/app_eks/outputs.tf | 2 +- outputs.tf | 1 - 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/main.tf b/main.tf index bf734fca..1d78ee6c 100644 --- a/main.tf +++ b/main.tf @@ -228,7 +228,9 @@ module "wandb" { annotations = { "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" - "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" + "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF + '${join("\\,", var.allowed_inbound_cidr)}' + EOF "alb.ingress.kubernetes.io/scheme" = "internet-facing" "alb.ingress.kubernetes.io/target-type" = "ip" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index 15f826ae..f71aefd3 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -21,7 +21,7 @@ resource "helm_release" "external_dns" { } set { - name = "domainFilters[0]" + name = "domainFilters[0]" value = var.fqdn } @@ -29,4 +29,4 @@ resource "helm_release" "external_dns" { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn } -} \ No newline at end of file +} diff --git a/modules/app_eks/lb_controller/controller.tf b/modules/app_eks/lb_controller/controller.tf index e6a572f0..eff2042c 100644 --- a/modules/app_eks/lb_controller/controller.tf +++ b/modules/app_eks/lb_controller/controller.tf @@ -3,7 +3,7 @@ resource "helm_release" "aws_load_balancer_controller" { repository = "https://aws.github.io/eks-charts" chart = "aws-load-balancer-controller" namespace = "kube-system" - version = "1.6.1" + version = "1.6.2" set { name = "clusterName" diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 0bf3c8b6..8f11adaa 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -9,4 +9,4 @@ output "autoscaling_group_names" { output "node_role" { value = aws_iam_role.node -} \ No newline at end of file +} diff --git a/outputs.tf b/outputs.tf index 5d9a20ef..43e88048 100644 --- a/outputs.tf +++ b/outputs.tf @@ -55,4 +55,3 @@ output "url" { value = local.url description = "The URL to the W&B application" } - From e8c92966df0f18cb9c33809e6d956c4dda6b959e Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 1 Dec 2023 13:45:28 -0600 Subject: [PATCH 05/31] Disabling otel per DD potential Conflict --- main.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.tf b/main.tf index 1d78ee6c..49d64b13 100644 --- a/main.tf +++ b/main.tf @@ -223,6 +223,10 @@ module "wandb" { } } + otel = { + install = false + } + ingress = { class = "alb" From 81d461619de8c2c908440a7482ffbe4514751555 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 1 Dec 2023 20:20:23 -0600 Subject: [PATCH 06/31] Revert "fix: Allow for inbound-cidrs for the ALB" This reverts commit 3cb558b0f481ebef424fedb07861d5744ee34c83. --- main.tf | 4 +--- modules/app_eks/external_dns/external_dns.tf | 4 ++-- modules/app_eks/lb_controller/controller.tf | 2 +- modules/app_eks/outputs.tf | 2 +- outputs.tf | 1 + 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/main.tf b/main.tf index 49d64b13..294e8697 100644 --- a/main.tf +++ b/main.tf @@ -232,9 +232,7 @@ module "wandb" { annotations = { "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" - "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF - '${join("\\,", var.allowed_inbound_cidr)}' - EOF + "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" "alb.ingress.kubernetes.io/scheme" = "internet-facing" "alb.ingress.kubernetes.io/target-type" = "ip" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index f71aefd3..15f826ae 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -21,7 +21,7 @@ resource "helm_release" "external_dns" { } set { - name = "domainFilters[0]" + name = "domainFilters[0]" value = var.fqdn } @@ -29,4 +29,4 @@ resource "helm_release" "external_dns" { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn } -} +} \ No newline at end of file diff --git a/modules/app_eks/lb_controller/controller.tf b/modules/app_eks/lb_controller/controller.tf index eff2042c..e6a572f0 100644 --- a/modules/app_eks/lb_controller/controller.tf +++ b/modules/app_eks/lb_controller/controller.tf @@ -3,7 +3,7 @@ resource "helm_release" "aws_load_balancer_controller" { repository = "https://aws.github.io/eks-charts" chart = "aws-load-balancer-controller" namespace = "kube-system" - version = "1.6.2" + version = "1.6.1" set { name = "clusterName" diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 8f11adaa..0bf3c8b6 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -9,4 +9,4 @@ output "autoscaling_group_names" { output "node_role" { value = aws_iam_role.node -} +} \ No newline at end of file diff --git a/outputs.tf b/outputs.tf index 43e88048..5d9a20ef 100644 --- a/outputs.tf +++ b/outputs.tf @@ -55,3 +55,4 @@ output "url" { value = local.url description = "The URL to the W&B application" } + From 416849a9032528f7f63b69f7b0345dacd5028243 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 1 Dec 2023 20:22:08 -0600 Subject: [PATCH 07/31] fixing a bug, can't quote the list of CIDRs --- main.tf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.tf b/main.tf index 294e8697..5e5cd14f 100644 --- a/main.tf +++ b/main.tf @@ -192,7 +192,6 @@ module "wandb" { module.app_eks, module.redis, ] - operator_chart_version = "1.1.0" controller_image_tag = "1.10.1" @@ -232,7 +231,9 @@ module "wandb" { annotations = { "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s" - "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0" + "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF + ${join("\\,", var.allowed_inbound_cidr)} + EOF "alb.ingress.kubernetes.io/scheme" = "internet-facing" "alb.ingress.kubernetes.io/target-type" = "ip" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" From 1636f8aea7e5d826e7c19b97edc1f95e635f5110 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 1 Dec 2023 20:25:06 -0600 Subject: [PATCH 08/31] fxing MR mistake --- modules/app_eks/external_dns/external_dns.tf | 4 ++-- modules/app_eks/lb_controller/controller.tf | 2 +- modules/app_eks/outputs.tf | 2 +- outputs.tf | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index 15f826ae..f71aefd3 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -21,7 +21,7 @@ resource "helm_release" "external_dns" { } set { - name = "domainFilters[0]" + name = "domainFilters[0]" value = var.fqdn } @@ -29,4 +29,4 @@ resource "helm_release" "external_dns" { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn } -} \ No newline at end of file +} diff --git a/modules/app_eks/lb_controller/controller.tf b/modules/app_eks/lb_controller/controller.tf index e6a572f0..eff2042c 100644 --- a/modules/app_eks/lb_controller/controller.tf +++ b/modules/app_eks/lb_controller/controller.tf @@ -3,7 +3,7 @@ resource "helm_release" "aws_load_balancer_controller" { repository = "https://aws.github.io/eks-charts" chart = "aws-load-balancer-controller" namespace = "kube-system" - version = "1.6.1" + version = "1.6.2" set { name = "clusterName" diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 0bf3c8b6..8f11adaa 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -9,4 +9,4 @@ output "autoscaling_group_names" { output "node_role" { value = aws_iam_role.node -} \ No newline at end of file +} diff --git a/outputs.tf b/outputs.tf index 5d9a20ef..43e88048 100644 --- a/outputs.tf +++ b/outputs.tf @@ -55,4 +55,3 @@ output "url" { value = local.url description = "The URL to the W&B application" } - From 1e58bc8e897f6d9d2fd76c587f30be6e7b353778 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> Date: Wed, 13 Dec 2023 18:09:55 -0600 Subject: [PATCH 09/31] Update main.tf --- main.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/main.tf b/main.tf index 5e5cd14f..eef80e90 100644 --- a/main.tf +++ b/main.tf @@ -222,10 +222,6 @@ module "wandb" { } } - otel = { - install = false - } - ingress = { class = "alb" From f4434f8e3087ed352b757ba076543c92c168cb58 Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Wed, 13 Dec 2023 22:52:38 -0500 Subject: [PATCH 10/31] Add extra envs --- main.tf | 2 ++ variables.tf | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/main.tf b/main.tf index eef80e90..04073c2f 100644 --- a/main.tf +++ b/main.tf @@ -201,6 +201,8 @@ module "wandb" { host = local.url license = var.license + extraEnv = var.other_wandb_env + bucket = { provider = "s3" name = local.bucket_name diff --git a/variables.tf b/variables.tf index 5622cf5d..12404d43 100644 --- a/variables.tf +++ b/variables.tf @@ -332,3 +332,8 @@ variable "license" { description = "Weights & Biases license key." } +variable "other_wandb_env" { + type = map(any) + description = "Extra environment variables for W&B" + default = {} +} \ No newline at end of file From 163a9e1cd78a9c6cebd0918ee692ab2d53b1fdd2 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 10:17:47 -0600 Subject: [PATCH 11/31] testing tf change --- main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.tf b/main.tf index 04073c2f..67ba9151 100644 --- a/main.tf +++ b/main.tf @@ -234,6 +234,7 @@ module "wandb" { EOF "alb.ingress.kubernetes.io/scheme" = "internet-facing" "alb.ingress.kubernetes.io/target-type" = "ip" + "test" = "test" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn } From ce1b6eb4552877a1ff774dc41b6f70c3202c51a2 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 10:25:55 -0600 Subject: [PATCH 12/31] removing test tf change --- main.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.tf b/main.tf index 67ba9151..04073c2f 100644 --- a/main.tf +++ b/main.tf @@ -234,7 +234,6 @@ module "wandb" { EOF "alb.ingress.kubernetes.io/scheme" = "internet-facing" "alb.ingress.kubernetes.io/target-type" = "ip" - "test" = "test" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn } From e5426dc8585ae4862a916ea75c4c5e16d5a9f8d7 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 11:58:02 -0600 Subject: [PATCH 13/31] adding efs-csi-driver for weave --- modules/app_eks/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index d58211b4..cc0133b5 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -15,6 +15,14 @@ resource "aws_eks_addon" "eks" { ] } +resource "aws_eks_addon" "efs" { + cluster_name = var.namespace + addon_name = "aws-efs-csi-driver" + depends_on = [ + module.eks + ] +} + # removed due to conflict with # AWS Load Balancer Controller # being installed with Helm. From b04d94c4638bf1ab8944e38e2519cde30d9be171 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 12:20:32 -0600 Subject: [PATCH 14/31] adding efs-csi-driver policy for weave --- modules/app_eks/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index cc0133b5..17a0733e 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -37,6 +37,7 @@ locals { managed_policy_arns = concat([ "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonEFSCSIDriverPolicy", "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", ], var.eks_policy_arns) } From a9ad3d4b7589192ecfbc0d8a168ce3c0be3decd0 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 12:34:36 -0600 Subject: [PATCH 15/31] adding efs-csi-driver policy for weave and fmting --- modules/app_eks/external_dns/variables.tf | 2 +- modules/app_eks/iam-policy-docs.tf | 2 +- modules/app_eks/iam-role-attachments.tf | 5 +++++ modules/app_eks/lb_controller/variables.tf | 2 +- modules/app_eks/main.tf | 9 --------- modules/networking/main.tf | 4 ++-- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/modules/app_eks/external_dns/variables.tf b/modules/app_eks/external_dns/variables.tf index 4e33cb7f..0626c3d2 100644 --- a/modules/app_eks/external_dns/variables.tf +++ b/modules/app_eks/external_dns/variables.tf @@ -1,5 +1,5 @@ variable "namespace" { - type = string + type = string } variable "oidc_provider" { diff --git a/modules/app_eks/iam-policy-docs.tf b/modules/app_eks/iam-policy-docs.tf index 5399aef0..4e7f27b4 100644 --- a/modules/app_eks/iam-policy-docs.tf +++ b/modules/app_eks/iam-policy-docs.tf @@ -68,7 +68,7 @@ data "aws_iam_policy_document" "secrets_manager" { "secretsmanager:GetSecretValue", "secretsmanager:DeleteSecretVersion" ] - effect = "Allow" + effect = "Allow" resources = ["arn:aws:secretsmanager:*:${data.aws_caller_identity.current.account_id}:secret:${var.namespace}*"] } } diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index 938ad34b..f2449faa 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -28,6 +28,11 @@ resource "aws_iam_role_policy_attachment" "eks_cni" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" } +resource "aws_iam_role_policy_attachment" "eks_cni" { + role = aws_iam_role.node.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEFSCSIDriverPolicy" +} + resource "aws_iam_role_policy_attachment" "eks_worker_node" { role = aws_iam_role.node.name policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" diff --git a/modules/app_eks/lb_controller/variables.tf b/modules/app_eks/lb_controller/variables.tf index be3e27a4..49fe5944 100644 --- a/modules/app_eks/lb_controller/variables.tf +++ b/modules/app_eks/lb_controller/variables.tf @@ -1,5 +1,5 @@ variable "namespace" { - type = string + type = string } variable "oidc_provider" { diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index 17a0733e..3e7470e2 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -33,15 +33,6 @@ resource "aws_eks_addon" "efs" { # depends_on = [module.eks] #} -locals { - managed_policy_arns = concat([ - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", - "arn:aws:iam::aws:policy/AmazonEFSCSIDriverPolicy", - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", - ], var.eks_policy_arns) -} - module "eks" { source = "terraform-aws-modules/eks/aws" version = "~> 17.23" diff --git a/modules/networking/main.tf b/modules/networking/main.tf index 328f234b..21382052 100644 --- a/modules/networking/main.tf +++ b/modules/networking/main.tf @@ -30,10 +30,10 @@ module "vpc" { single_nat_gateway = false private_subnet_tags = { - "kubernetes.io/role/internal-elb" = "1" + "kubernetes.io/role/internal-elb" = "1" } public_subnet_tags = { - "kubernetes.io/role/elb" = "1" + "kubernetes.io/role/elb" = "1" } } From 2847ad87b4dec06c624df02f5a843281e732eff1 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 12:36:15 -0600 Subject: [PATCH 16/31] namespacing fix --- modules/app_eks/iam-role-attachments.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index f2449faa..5196d4a8 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -28,7 +28,7 @@ resource "aws_iam_role_policy_attachment" "eks_cni" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" } -resource "aws_iam_role_policy_attachment" "eks_cni" { +resource "aws_iam_role_policy_attachment" "eks_efs" { role = aws_iam_role.node.name policy_arn = "arn:aws:iam::aws:policy/AmazonEFSCSIDriverPolicy" } From 8a538da341d0e8426147d266e7c86a59cb96b6a1 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 12:42:38 -0600 Subject: [PATCH 17/31] fixing arn --- modules/app_eks/iam-role-attachments.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index 5196d4a8..4d023a62 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -30,7 +30,7 @@ resource "aws_iam_role_policy_attachment" "eks_cni" { resource "aws_iam_role_policy_attachment" "eks_efs" { role = aws_iam_role.node.name - policy_arn = "arn:aws:iam::aws:policy/AmazonEFSCSIDriverPolicy" + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy" } resource "aws_iam_role_policy_attachment" "eks_worker_node" { From ca761dd719ce5bf0f61b55f33c0a110edbc0a492 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Thu, 14 Dec 2023 12:49:01 -0600 Subject: [PATCH 18/31] updating policy --- modules/app_eks/iam-role-attachments.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index 4d023a62..e82fe63b 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -30,7 +30,7 @@ resource "aws_iam_role_policy_attachment" "eks_cni" { resource "aws_iam_role_policy_attachment" "eks_efs" { role = aws_iam_role.node.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy" + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy" } resource "aws_iam_role_policy_attachment" "eks_worker_node" { From 9acf6c3646e44991bc8ed2aeb2ff6c0cdd676a5a Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 15 Dec 2023 16:49:48 -0600 Subject: [PATCH 19/31] updates for weave EFS storage class --- main.tf | 23 ++++++++++++++++ modules/app_eks/main.tf | 6 ++-- modules/app_eks/outputs.tf | 4 +++ modules/efs/main.tf | 49 +++++++++++++++++++++++++++++++++ modules/efs/outputs.tf | 3 ++ modules/efs/variables.tf | 19 +++++++++++++ modules/file_storage/outputs.tf | 2 +- 7 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 modules/efs/main.tf create mode 100644 modules/efs/outputs.tf create mode 100644 modules/efs/variables.tf diff --git a/main.tf b/main.tf index 04073c2f..820eed8d 100644 --- a/main.tf +++ b/main.tf @@ -26,6 +26,14 @@ module "file_storage" { deletion_protection = var.deletion_protection } +module "efs" { + source = "./modules/efs" + namespace = var.namespace + private_subnets = module.networking.private_subnets + primary_workers_security_group_id = module.app_eks.primary_workers_security_group_id + vpc_id = module.networking.vpc_id +} + locals { bucket_name = local.use_external_bucket ? var.bucket_name : module.file_storage.0.bucket_name bucket_queue_name = local.use_internal_queue ? null : module.file_storage.0.bucket_queue_name @@ -241,6 +249,21 @@ module "wandb" { mysql = { install = false } redis = { install = false } + + weave = { + persistence = { + provider = "efs" + efs = { + fileSystemId = module.efs.efs_id + } + + } + } } } } + + +output "efs_ip" { + value = module.efs.efs_ip +} diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index 3e7470e2..063ff729 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -16,8 +16,10 @@ resource "aws_eks_addon" "eks" { } resource "aws_eks_addon" "efs" { - cluster_name = var.namespace - addon_name = "aws-efs-csi-driver" + cluster_name = module.eks.cluster_id + addon_name = "aws-efs-csi-driver" + addon_version = "v1.7.1-eksbuild.1" # Ensure this version is compatible + resolve_conflicts = "OVERWRITE" depends_on = [ module.eks ] diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 8f11adaa..953e2979 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -10,3 +10,7 @@ output "autoscaling_group_names" { output "node_role" { value = aws_iam_role.node } + +output "primary_workers_security_group_id" { + value = aws_security_group.primary_workers.id +} diff --git a/modules/efs/main.tf b/modules/efs/main.tf new file mode 100644 index 00000000..101e5187 --- /dev/null +++ b/modules/efs/main.tf @@ -0,0 +1,49 @@ +resource "random_pet" "efs" { + length = 2 +} + +resource "aws_efs_file_system" "storage_class" { + creation_token = "${var.namespace}-${random_pet.efs.id}" + encrypted = true + performance_mode = "generalPurpose" + throughput_mode = "elastic" + + + tags = { + Name = "${var.namespace}-efs-${random_pet.efs.id}" + } +} + +resource "aws_efs_backup_policy" "storage_class" { + file_system_id = aws_efs_file_system.storage_class.id + + backup_policy { + status = "DISABLED" + } +} + +resource "aws_security_group" "storage_class_nfs" { + name = "nfs-security-group" + description = "Security group for NFS traffic" + vpc_id = var.vpc_id + + ingress { + description = "NFS inbound" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + security_groups = [var.primary_workers_security_group_id] + } + + tags = { + Name = "nfs-security-group" + } +} + + +resource "aws_efs_mount_target" "storage_class" { + for_each = { for subnet in var.private_subnets : subnet => subnet } + file_system_id = aws_efs_file_system.storage_class.id + subnet_id = each.value + security_groups = [aws_security_group.storage_class_nfs.id] +} diff --git a/modules/efs/outputs.tf b/modules/efs/outputs.tf new file mode 100644 index 00000000..be96383e --- /dev/null +++ b/modules/efs/outputs.tf @@ -0,0 +1,3 @@ +output "efs_id" { + value = aws_efs_file_system.storage_class.id +} diff --git a/modules/efs/variables.tf b/modules/efs/variables.tf new file mode 100644 index 00000000..6ea68f96 --- /dev/null +++ b/modules/efs/variables.tf @@ -0,0 +1,19 @@ +variable "namespace" { + description = "The namespace to use for the efs resource" + type = string +} + +variable "private_subnets" { + description = "A list of the subnets in which the aws_efs_mount_target will be deployed." + type = list(string) +} + +variable "primary_workers_security_group_id" { + description = "The security group ID of the primary workers." + type = string +} + +variable "vpc_id" { + description = "The ID of the VPC in which the storage_class_nfs security group will be deployed." + type = string +} diff --git a/modules/file_storage/outputs.tf b/modules/file_storage/outputs.tf index 3e6815e5..9beae402 100644 --- a/modules/file_storage/outputs.tf +++ b/modules/file_storage/outputs.tf @@ -20,4 +20,4 @@ output "bucket_queue_name" { output "bucket_queue_arn" { value = var.create_queue ? aws_sqs_queue.file_storage.0.arn : null -} \ No newline at end of file +} From 9fc18fad14fe57ef50ed09bc61af392523e123f5 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 15 Dec 2023 17:05:39 -0600 Subject: [PATCH 20/31] remove debug block --- main.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/main.tf b/main.tf index 820eed8d..19d6fe18 100644 --- a/main.tf +++ b/main.tf @@ -262,8 +262,3 @@ module "wandb" { } } } - - -output "efs_ip" { - value = module.efs.efs_ip -} From 3e5a1f7ad23b97546284c62631f241ab2350ec03 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Fri, 15 Dec 2023 17:10:39 -0600 Subject: [PATCH 21/31] fix a provider error --- modules/efs/main.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/efs/main.tf b/modules/efs/main.tf index 101e5187..6ae02a95 100644 --- a/modules/efs/main.tf +++ b/modules/efs/main.tf @@ -34,10 +34,6 @@ resource "aws_security_group" "storage_class_nfs" { protocol = "tcp" security_groups = [var.primary_workers_security_group_id] } - - tags = { - Name = "nfs-security-group" - } } From 64c36767ab6f044ca9fd0058ae782b30bfc27b37 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 12:22:33 -0600 Subject: [PATCH 22/31] removing name --- modules/efs/main.tf | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/efs/main.tf b/modules/efs/main.tf index 6ae02a95..0b9671c5 100644 --- a/modules/efs/main.tf +++ b/modules/efs/main.tf @@ -7,11 +7,6 @@ resource "aws_efs_file_system" "storage_class" { encrypted = true performance_mode = "generalPurpose" throughput_mode = "elastic" - - - tags = { - Name = "${var.namespace}-efs-${random_pet.efs.id}" - } } resource "aws_efs_backup_policy" "storage_class" { @@ -34,6 +29,7 @@ resource "aws_security_group" "storage_class_nfs" { protocol = "tcp" security_groups = [var.primary_workers_security_group_id] } + } From b7f1f27ba912d512a1f866e5ee5991eb2b8389bf Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 12:59:31 -0600 Subject: [PATCH 23/31] updating sg name --- modules/efs/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/efs/main.tf b/modules/efs/main.tf index 0b9671c5..42350518 100644 --- a/modules/efs/main.tf +++ b/modules/efs/main.tf @@ -18,7 +18,7 @@ resource "aws_efs_backup_policy" "storage_class" { } resource "aws_security_group" "storage_class_nfs" { - name = "nfs-security-group" + name = "${var.namespace}-${random_pet.efs.id}" description = "Security group for NFS traffic" vpc_id = var.vpc_id From d907526ecdd3237335181f133942713d9d7e91c6 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 13:55:29 -0600 Subject: [PATCH 24/31] refactor to app-eks --- main.tf | 10 +--------- modules/{efs/main.tf => app_eks/efs.tf} | 6 +++--- modules/app_eks/outputs.tf | 7 +++++-- modules/efs/outputs.tf | 3 --- modules/efs/variables.tf | 19 ------------------- 5 files changed, 9 insertions(+), 36 deletions(-) rename modules/{efs/main.tf => app_eks/efs.tf} (83%) delete mode 100644 modules/efs/outputs.tf delete mode 100644 modules/efs/variables.tf diff --git a/main.tf b/main.tf index 19d6fe18..b7f43510 100644 --- a/main.tf +++ b/main.tf @@ -26,14 +26,6 @@ module "file_storage" { deletion_protection = var.deletion_protection } -module "efs" { - source = "./modules/efs" - namespace = var.namespace - private_subnets = module.networking.private_subnets - primary_workers_security_group_id = module.app_eks.primary_workers_security_group_id - vpc_id = module.networking.vpc_id -} - locals { bucket_name = local.use_external_bucket ? var.bucket_name : module.file_storage.0.bucket_name bucket_queue_name = local.use_internal_queue ? null : module.file_storage.0.bucket_queue_name @@ -254,7 +246,7 @@ module "wandb" { persistence = { provider = "efs" efs = { - fileSystemId = module.efs.efs_id + fileSystemId = module.app_eks.efs_id } } diff --git a/modules/efs/main.tf b/modules/app_eks/efs.tf similarity index 83% rename from modules/efs/main.tf rename to modules/app_eks/efs.tf index 42350518..626396ab 100644 --- a/modules/efs/main.tf +++ b/modules/app_eks/efs.tf @@ -20,21 +20,21 @@ resource "aws_efs_backup_policy" "storage_class" { resource "aws_security_group" "storage_class_nfs" { name = "${var.namespace}-${random_pet.efs.id}" description = "Security group for NFS traffic" - vpc_id = var.vpc_id + vpc_id = var.network_id ingress { description = "NFS inbound" from_port = 2049 to_port = 2049 protocol = "tcp" - security_groups = [var.primary_workers_security_group_id] + security_groups = [aws_security_group.primary_workers.id] } } resource "aws_efs_mount_target" "storage_class" { - for_each = { for subnet in var.private_subnets : subnet => subnet } + for_each = { for subnet in var.network_private_subnets : subnet => subnet } file_system_id = aws_efs_file_system.storage_class.id subnet_id = each.value security_groups = [aws_security_group.storage_class_nfs.id] diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 953e2979..304b51db 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -1,10 +1,13 @@ +output "autoscaling_group_names" { + value = { for name, value in module.eks.node_groups : name => lookup(lookup(lookup(value, "resources")[0], "autoscaling_groups")[0], "name") } +} output "cluster_id" { value = module.eks.cluster_id description = "ID of the created EKS cluster" } -output "autoscaling_group_names" { - value = { for name, value in module.eks.node_groups : name => lookup(lookup(lookup(value, "resources")[0], "autoscaling_groups")[0], "name") } +output "efs_id" { + value = aws_efs_file_system.storage_class.id } output "node_role" { diff --git a/modules/efs/outputs.tf b/modules/efs/outputs.tf deleted file mode 100644 index be96383e..00000000 --- a/modules/efs/outputs.tf +++ /dev/null @@ -1,3 +0,0 @@ -output "efs_id" { - value = aws_efs_file_system.storage_class.id -} diff --git a/modules/efs/variables.tf b/modules/efs/variables.tf deleted file mode 100644 index 6ea68f96..00000000 --- a/modules/efs/variables.tf +++ /dev/null @@ -1,19 +0,0 @@ -variable "namespace" { - description = "The namespace to use for the efs resource" - type = string -} - -variable "private_subnets" { - description = "A list of the subnets in which the aws_efs_mount_target will be deployed." - type = list(string) -} - -variable "primary_workers_security_group_id" { - description = "The security group ID of the primary workers." - type = string -} - -variable "vpc_id" { - description = "The ID of the VPC in which the storage_class_nfs security group will be deployed." - type = string -} From def2a11e3e3dcc663e3f2ee7eceed9961df7d224 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 14:13:14 -0600 Subject: [PATCH 25/31] adding aws_security_group_rule --- modules/app_eks/efs.tf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf index 626396ab..1cfcbe47 100644 --- a/modules/app_eks/efs.tf +++ b/modules/app_eks/efs.tf @@ -21,17 +21,17 @@ resource "aws_security_group" "storage_class_nfs" { name = "${var.namespace}-${random_pet.efs.id}" description = "Security group for NFS traffic" vpc_id = var.network_id - - ingress { - description = "NFS inbound" - from_port = 2049 - to_port = 2049 - protocol = "tcp" - security_groups = [aws_security_group.primary_workers.id] - } - } +resource "aws_security_group_rule" "nfs_ingress" { + description = "NFS inbound" + type = "ingress" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + security_group_id = aws_security_group.storage_class_nfs.id + source_security_group_id = aws_security_group.primary_workers.id +} resource "aws_efs_mount_target" "storage_class" { for_each = { for subnet in var.network_private_subnets : subnet => subnet } From d8a73e648549796dc46c52eece9bf0ba2e0a7463 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 14:19:27 -0600 Subject: [PATCH 26/31] adding aws_security_group_rule --- modules/app_eks/efs.tf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf index 1cfcbe47..467f81e2 100644 --- a/modules/app_eks/efs.tf +++ b/modules/app_eks/efs.tf @@ -23,6 +23,15 @@ resource "aws_security_group" "storage_class_nfs" { vpc_id = var.network_id } +resource "aws_vpc_security_group_ingress_rule" "example" { + security_group_id = aws_security_group.example.id + + cidr_ipv4 = "10.0.0.0/8" + from_port = 80 + ip_protocol = "tcp" + to_port = 80 +} + resource "aws_security_group_rule" "nfs_ingress" { description = "NFS inbound" type = "ingress" From 3a07c7bc10ef9a97e8fd4d639ac033907c53e352 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 14:31:05 -0600 Subject: [PATCH 27/31] adding aws_security_group_rule --- modules/app_eks/efs.tf | 9 --------- 1 file changed, 9 deletions(-) diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf index 467f81e2..1cfcbe47 100644 --- a/modules/app_eks/efs.tf +++ b/modules/app_eks/efs.tf @@ -23,15 +23,6 @@ resource "aws_security_group" "storage_class_nfs" { vpc_id = var.network_id } -resource "aws_vpc_security_group_ingress_rule" "example" { - security_group_id = aws_security_group.example.id - - cidr_ipv4 = "10.0.0.0/8" - from_port = 80 - ip_protocol = "tcp" - to_port = 80 -} - resource "aws_security_group_rule" "nfs_ingress" { description = "NFS inbound" type = "ingress" From 52173c97a3145b22e9bd3ebcedb7a3a7b5591d80 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 15:04:26 -0600 Subject: [PATCH 28/31] weird spacing issue --- modules/app_eks/efs.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf index 1cfcbe47..bb07de78 100644 --- a/modules/app_eks/efs.tf +++ b/modules/app_eks/efs.tf @@ -17,6 +17,7 @@ resource "aws_efs_backup_policy" "storage_class" { } } + resource "aws_security_group" "storage_class_nfs" { name = "${var.namespace}-${random_pet.efs.id}" description = "Security group for NFS traffic" From 4f9536cdc14484013cbe52b30c25721e3c0e3e7a Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Mon, 18 Dec 2023 15:04:49 -0600 Subject: [PATCH 29/31] weird spacing issue --- modules/app_eks/efs.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/app_eks/efs.tf b/modules/app_eks/efs.tf index bb07de78..1cfcbe47 100644 --- a/modules/app_eks/efs.tf +++ b/modules/app_eks/efs.tf @@ -17,7 +17,6 @@ resource "aws_efs_backup_policy" "storage_class" { } } - resource "aws_security_group" "storage_class_nfs" { name = "${var.namespace}-${random_pet.efs.id}" description = "Security group for NFS traffic" From 52934c3a69ecc65e8dc57f5aad7d5c052358c8f7 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 19 Dec 2023 20:41:29 -0600 Subject: [PATCH 30/31] Fix bucket kms key arn for external buckets --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index b7f43510..6cc4ae10 100644 --- a/main.tf +++ b/main.tf @@ -207,7 +207,7 @@ module "wandb" { provider = "s3" name = local.bucket_name region = data.aws_s3_bucket.file_storage.region - kmsKey = local.kms_key_arn + kmsKey = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn } mysql = { From 1496448efefe57c0f9a98ac93f105969061d4823 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Wed, 20 Dec 2023 15:59:51 -0600 Subject: [PATCH 31/31] fixing the username->user typo --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 6cc4ae10..6729136a 100644 --- a/main.tf +++ b/main.tf @@ -213,7 +213,7 @@ module "wandb" { mysql = { host = module.database.endpoint password = module.database.password - username = module.database.username + user = module.database.username database = module.database.database_name port = module.database.port }