Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Init operator #154

Merged
merged 32 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f8f0a15
operator module
jsbroks Nov 15, 2023
0c99c7c
pass license
jsbroks Nov 15, 2023
a266648
Update main.tf
jsbroks Nov 15, 2023
3cb558b
fix: Allow for inbound-cidrs for the ALB
zacharyblasczyk Dec 1, 2023
e8c9296
Disabling otel per DD potential Conflict
zacharyblasczyk Dec 1, 2023
81d4616
Revert "fix: Allow for inbound-cidrs for the ALB"
zacharyblasczyk Dec 2, 2023
416849a
fixing a bug, can't quote the list of CIDRs
zacharyblasczyk Dec 2, 2023
1636f8a
fxing MR mistake
zacharyblasczyk Dec 2, 2023
601088f
Merge remote-tracking branch 'origin/main' into operator
zacharyblasczyk Dec 13, 2023
1e58bc8
Update main.tf
zacharyblasczyk Dec 14, 2023
f4434f8
Add extra envs
jsbroks Dec 14, 2023
163a9e1
testing tf change
zacharyblasczyk Dec 14, 2023
ce1b6eb
removing test tf change
zacharyblasczyk Dec 14, 2023
e5426dc
adding efs-csi-driver for weave
zacharyblasczyk Dec 14, 2023
b04d94c
adding efs-csi-driver policy for weave
zacharyblasczyk Dec 14, 2023
a9ad3d4
adding efs-csi-driver policy for weave and fmting
zacharyblasczyk Dec 14, 2023
2847ad8
namespacing fix
zacharyblasczyk Dec 14, 2023
8a538da
fixing arn
zacharyblasczyk Dec 14, 2023
ca761dd
updating policy
zacharyblasczyk Dec 14, 2023
9acf6c3
updates for weave EFS storage class
zacharyblasczyk Dec 15, 2023
9fc18fa
remove debug block
zacharyblasczyk Dec 15, 2023
3e5a1f7
fix a provider error
zacharyblasczyk Dec 15, 2023
64c3676
removing name
zacharyblasczyk Dec 18, 2023
b7f1f27
updating sg name
zacharyblasczyk Dec 18, 2023
d907526
refactor to app-eks
zacharyblasczyk Dec 18, 2023
def2a11
adding aws_security_group_rule
zacharyblasczyk Dec 18, 2023
d8a73e6
adding aws_security_group_rule
zacharyblasczyk Dec 18, 2023
3a07c7b
adding aws_security_group_rule
zacharyblasczyk Dec 18, 2023
52173c9
weird spacing issue
zacharyblasczyk Dec 18, 2023
4f9536c
weird spacing issue
zacharyblasczyk Dec 18, 2023
52934c3
Fix bucket kms key arn for external buckets
zacharyblasczyk Dec 20, 2023
1496448
fixing the username->user typo
zacharyblasczyk Dec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 79 additions & 59 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ module "file_storage" {
deletion_protection = var.deletion_protection
}

module "efs" {
source = "./modules/efs"
namespace = var.namespace
private_subnets = module.networking.private_subnets
primary_workers_security_group_id = module.app_eks.primary_workers_security_group_id
vpc_id = module.networking.vpc_id
}

locals {
bucket_name = local.use_external_bucket ? var.bucket_name : module.file_storage.0.bucket_name
bucket_queue_name = local.use_internal_queue ? null : module.file_storage.0.bucket_queue_name
Expand Down Expand Up @@ -183,62 +191,74 @@ module "redis" {
kms_key_arn = local.kms_key_arn
}

# Comming soon!
# module "wandb" {
# source = "wandb/wandb/helm"
# version = "1.2.0"

# depends_on = [
# module.database,
# module.app_eks,
# module.redis,
# ]

# operator_chart_version = "1.1.0"
# controller_image_tag = "1.10.1"

# spec = {
# values = {
# global = {
# host = local.url
# license = var.license

# bucket = {
# provider = "s3"
# name = local.bucket_name
# region = data.aws_s3_bucket.file_storage.region
# kmsKey = local.kms_key_arn
# }

# mysql = {
# host = module.database.endpoint
# password = module.database.password
# username = module.database.username
# database = module.database.database_name
# port = module.database.port
# }

# redis = {
# host = module.redis.0.host
# port = "${module.redis.0.port}?tls=true"
# }
# }

# ingress = {
# class = "alb"

# annotations = {
# "alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s"
# "alb.ingress.kubernetes.io/inbound-cidrs" = "0.0.0.0/0"
# "alb.ingress.kubernetes.io/scheme" = "internet-facing"
# "alb.ingress.kubernetes.io/target-type" = "ip"
# "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]"
# "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn
# }
# }

# mysql = { install = false }
# redis = { install = false }
# }
# }
# }
module "wandb" {
source = "wandb/wandb/helm"
version = "1.2.0"

depends_on = [
module.database,
module.app_eks,
module.redis,
]
operator_chart_version = "1.1.0"
controller_image_tag = "1.10.1"

spec = {
values = {
global = {
host = local.url
license = var.license

extraEnv = var.other_wandb_env

bucket = {
provider = "s3"
name = local.bucket_name
region = data.aws_s3_bucket.file_storage.region
kmsKey = local.kms_key_arn
}

mysql = {
host = module.database.endpoint
password = module.database.password
username = module.database.username
database = module.database.database_name
port = module.database.port
}

redis = {
host = module.redis.0.host
port = "${module.redis.0.port}?tls=true"
}
}

ingress = {
class = "alb"

annotations = {
"alb.ingress.kubernetes.io/load-balancer-name" = "${var.namespace}-alb-k8s"
"alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF
${join("\\,", var.allowed_inbound_cidr)}
EOF
"alb.ingress.kubernetes.io/scheme" = "internet-facing"
"alb.ingress.kubernetes.io/target-type" = "ip"
"alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]"
"alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn
}
}

mysql = { install = false }
redis = { install = false }

weave = {
persistence = {
provider = "efs"
efs = {
fileSystemId = module.efs.efs_id
}

}
}
}
}
}
4 changes: 2 additions & 2 deletions modules/app_eks/external_dns/external_dns.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ resource "helm_release" "external_dns" {
}

set {
name = "domainFilters[0]"
name = "domainFilters[0]"
value = var.fqdn
}

set {
name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
value = aws_iam_role.default.arn
}
}
}
2 changes: 1 addition & 1 deletion modules/app_eks/external_dns/variables.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
variable "namespace" {
type = string
type = string
}

variable "oidc_provider" {
Expand Down
2 changes: 1 addition & 1 deletion modules/app_eks/iam-policy-docs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ data "aws_iam_policy_document" "secrets_manager" {
"secretsmanager:GetSecretValue",
"secretsmanager:DeleteSecretVersion"
]
effect = "Allow"
effect = "Allow"
resources = ["arn:aws:secretsmanager:*:${data.aws_caller_identity.current.account_id}:secret:${var.namespace}*"]
}
}
5 changes: 5 additions & 0 deletions modules/app_eks/iam-role-attachments.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ resource "aws_iam_role_policy_attachment" "eks_cni" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
}

resource "aws_iam_role_policy_attachment" "eks_efs" {
role = aws_iam_role.node.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy"
}

resource "aws_iam_role_policy_attachment" "eks_worker_node" {
role = aws_iam_role.node.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
Expand Down
2 changes: 1 addition & 1 deletion modules/app_eks/lb_controller/controller.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ resource "helm_release" "aws_load_balancer_controller" {
repository = "https://aws.github.io/eks-charts"
chart = "aws-load-balancer-controller"
namespace = "kube-system"
version = "1.6.1"
version = "1.6.2"

set {
name = "clusterName"
Expand Down
2 changes: 1 addition & 1 deletion modules/app_eks/lb_controller/variables.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
variable "namespace" {
type = string
type = string
}

variable "oidc_provider" {
Expand Down
18 changes: 10 additions & 8 deletions modules/app_eks/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ resource "aws_eks_addon" "eks" {
]
}

resource "aws_eks_addon" "efs" {
cluster_name = module.eks.cluster_id
addon_name = "aws-efs-csi-driver"
addon_version = "v1.7.1-eksbuild.1" # Ensure this version is compatible
resolve_conflicts = "OVERWRITE"
depends_on = [
module.eks
]
}

# removed due to conflict with
# AWS Load Balancer Controller
# being installed with Helm.
Expand All @@ -25,14 +35,6 @@ resource "aws_eks_addon" "eks" {
# depends_on = [module.eks]
#}

locals {
managed_policy_arns = concat([
"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy",
"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly",
], var.eks_policy_arns)
}

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 17.23"
Expand Down
6 changes: 5 additions & 1 deletion modules/app_eks/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@ output "autoscaling_group_names" {

output "node_role" {
value = aws_iam_role.node
}
}

output "primary_workers_security_group_id" {
value = aws_security_group.primary_workers.id
}
41 changes: 41 additions & 0 deletions modules/efs/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
resource "random_pet" "efs" {
length = 2
}

resource "aws_efs_file_system" "storage_class" {
creation_token = "${var.namespace}-${random_pet.efs.id}"
encrypted = true
performance_mode = "generalPurpose"
throughput_mode = "elastic"
}

resource "aws_efs_backup_policy" "storage_class" {
file_system_id = aws_efs_file_system.storage_class.id

backup_policy {
status = "DISABLED"
}
}

resource "aws_security_group" "storage_class_nfs" {
gls4 marked this conversation as resolved.
Show resolved Hide resolved
name = "${var.namespace}-${random_pet.efs.id}"
description = "Security group for NFS traffic"
vpc_id = var.vpc_id

ingress {
description = "NFS inbound"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_groups = [var.primary_workers_security_group_id]
}

}


resource "aws_efs_mount_target" "storage_class" {
for_each = { for subnet in var.private_subnets : subnet => subnet }
file_system_id = aws_efs_file_system.storage_class.id
subnet_id = each.value
security_groups = [aws_security_group.storage_class_nfs.id]
}
3 changes: 3 additions & 0 deletions modules/efs/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "efs_id" {
value = aws_efs_file_system.storage_class.id
}
19 changes: 19 additions & 0 deletions modules/efs/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
variable "namespace" {
description = "The namespace to use for the efs resource"
type = string
}

variable "private_subnets" {
description = "A list of the subnets in which the aws_efs_mount_target will be deployed."
type = list(string)
}

variable "primary_workers_security_group_id" {
description = "The security group ID of the primary workers."
type = string
}

variable "vpc_id" {
description = "The ID of the VPC in which the storage_class_nfs security group will be deployed."
type = string
}
2 changes: 1 addition & 1 deletion modules/file_storage/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ output "bucket_queue_name" {

output "bucket_queue_arn" {
value = var.create_queue ? aws_sqs_queue.file_storage.0.arn : null
}
}
4 changes: 2 additions & 2 deletions modules/networking/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ module "vpc" {
single_nat_gateway = false

private_subnet_tags = {
"kubernetes.io/role/internal-elb" = "1"
"kubernetes.io/role/internal-elb" = "1"
}

public_subnet_tags = {
"kubernetes.io/role/elb" = "1"
"kubernetes.io/role/elb" = "1"
}
}
1 change: 0 additions & 1 deletion outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,3 @@ output "url" {
value = local.url
description = "The URL to the W&B application"
}

13 changes: 9 additions & 4 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,13 @@ variable "elasticache_node_type" {
# ##########################################
# # Weights & Biases #
# ##########################################
# variable "license" {
# type = string
# description = "Weights & Biases license key."
# }
variable "license" {
type = string
description = "Weights & Biases license key."
}

variable "other_wandb_env" {
type = map(any)
description = "Extra environment variables for W&B"
default = {}
}
Loading