diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl index 1cc91aa77d7..8dfea1886f4 100644 --- a/enos/enos-scenario-upgrade.hcl +++ b/enos/enos-scenario-upgrade.hcl @@ -89,18 +89,35 @@ scenario "upgrade" { module = module.run_workloads variables { - nomad_addr = step.provision_cluster.nomad_addr - ca_file = step.provision_cluster.ca_file - cert_file = step.provision_cluster.cert_file - key_file = step.provision_cluster.key_file - nomad_token = step.provision_cluster.nomad_token + nomad_addr = step.provision_cluster.nomad_addr + ca_file = step.provision_cluster.ca_file + cert_file = step.provision_cluster.cert_file + key_file = step.provision_cluster.key_file + nomad_token = step.provision_cluster.nomad_token + cluster_name = local.cluster_name + workloads = { + # deploy these first + csi_plugin_ebs_controller = { + job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl" + alloc_count = 2 + type = "service" + } + csi_plugin_ebs_node = { + job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl" + alloc_count = 0 + type = "system" + post_script = "scripts/wait_for_ebs_plugin.sh" + } + service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3, type = "service" } service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3, type = "service" } system_docker = { job_spec = "jobs/docker-system.nomad.hcl", alloc_count = 0, type = "system" } batch_docker = { job_spec = "jobs/docker-batch.nomad.hcl", alloc_count = 3, type = "batch" } batch_raw_exec = { job_spec = "jobs/raw-exec-batch.nomad.hcl", alloc_count = 3, type = "batch" } system_raw_exec = { job_spec = "jobs/raw-exec-system.nomad.hcl", alloc_count = 0, type = "system" } + wants_csi = { job_spec = "jobs/wants-volume.nomad.hcl", alloc_count = 2, type = "service" } + } } diff --git a/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl b/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl new file mode 100644 index 00000000000..0e7bbbf2bb0 --- /dev/null +++ b/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl @@ -0,0 +1,45 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +variable "alloc_count" { + type = number + default = 2 +} + +job "plugin-aws-ebs-controller" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "controller" { + + count = var.alloc_count + + task "plugin" { + driver = "docker" + + config { + image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0" + + args = [ + "controller", + "--endpoint=${CSI_ENDPOINT}", + "--logtostderr", + "--v=5", + ] + } + + csi_plugin { + id = "aws-ebs0" + type = "controller" + mount_dir = "/csi" + } + + resources { + cpu = 100 + memory = 256 + } + } + } +} diff --git a/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl b/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl new file mode 100644 index 00000000000..a8cbf2a138d --- /dev/null +++ b/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl @@ -0,0 +1,42 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "plugin-aws-ebs-nodes" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + type = "system" + + group "nodes" { + task "plugin" { + driver = "docker" + + config { + image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0" + + args = [ + "node", + "--endpoint=${CSI_ENDPOINT}", + "--logtostderr", + "--v=5", + ] + + privileged = true + } + + csi_plugin { + id = "aws-ebs0" + type = "node" + mount_dir = "/csi" + } + + resources { + cpu = 100 + memory = 256 + } + } + } +} diff --git a/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl b/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl new file mode 100644 index 00000000000..75fe9da7cf9 --- /dev/null +++ b/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl @@ -0,0 +1,75 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +variable "alloc_count" { + type = number + default = 1 +} + +# a job that mounts an EBS volume and writes its job ID as a file +job "wants-ebs-volume" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "group" { + count = var.alloc_count + + volume "test" { + type = "csi" + source = "ebsTestVolume" + attachment_mode = "file-system" + access_mode = "single-node-writer" + } + + task "task" { + driver = "docker" + + config { + image = "busybox:1" + command = "httpd" + args = ["-vv", "-f", "-p", "8001", "-h", "/local"] + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 100 + memory = 64 + } + } + + task "sidecar" { + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["-c", "echo '${NOMAD_ALLOC_ID}' > ${NOMAD_TASK_DIR}/index.html"] + } + + lifecycle { + hook = "poststart" + sidecar = false + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 10 + memory = 10 + } + + } + } +} diff --git a/enos/modules/run_workloads/main.tf b/enos/modules/run_workloads/main.tf index e3e3fec73b5..7bfa3a6e426 100644 --- a/enos/modules/run_workloads/main.tf +++ b/enos/modules/run_workloads/main.tf @@ -16,12 +16,31 @@ locals { NOMAD_CLIENT_CERT = var.cert_file NOMAD_CLIENT_KEY = var.key_file NOMAD_TOKEN = var.nomad_token + CLUSTER_NAME = var.cluster_name } system_job_count = length({ for k, v in var.workloads : k => v if v.type == "system" }) service_batch_allocs = sum([for wl in var.workloads : wl.alloc_count]) } +# need to get the zone for one of the instances +data "aws_instance" "server0" { + filter { + name = "tag:Name" + values = ["${var.cluster_name}-server0"] + } +} + +# test volume we'll register for the CSI workload +resource "aws_ebs_volume" "test_volume" { + availability_zone = data.aws_instance.server0.availability_zone + size = 10 + + tags = { + Cluster = "${var.cluster_name}" + } +} + resource "enos_local_exec" "wait_for_nomad_api" { environment = local.nomad_env @@ -52,5 +71,8 @@ resource "enos_local_exec" "workloads" { environment = local.nomad_env - inline = ["nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}"] + inline = [ + "nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}", + each.value.post_script != "" ? each.value.post_script : "echo ok" + ] } diff --git a/enos/modules/run_workloads/scripts/volume.hcl.tpl b/enos/modules/run_workloads/scripts/volume.hcl.tpl new file mode 100644 index 00000000000..99ee051fb71 --- /dev/null +++ b/enos/modules/run_workloads/scripts/volume.hcl.tpl @@ -0,0 +1,13 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +type = "csi" +id = "ebsTestVolume" +name = "IDEMPOTENCY_TOKEN" +external_id = "EXTERNAL_ID" +plugin_id = "aws-ebs0" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} diff --git a/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh b/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh new file mode 100755 index 00000000000..193a1327623 --- /dev/null +++ b/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +# note: it can a very long time for plugins to come up +TIMEOUT=60 +INTERVAL=2 +last_error= +start_time=$(date +%s) + +checkPlugin() { + local pluginStatus foundControllers foundNodes + pluginStatus=$(nomad plugin status aws-ebs0) || { + last_error="could not read CSI plugin status" + return 1 + } + + foundControllers=$(echo "$pluginStatus" | awk -F'= +' '/Controllers Healthy/{print $2}') + if [[ "$foundControllers" != 2 ]]; then + echo "$foundControllers" + last_error="expected plugin to have 2 healthy controllers, found $foundControllers" + return 1 + fi + + foundNodes=$(echo "$pluginStatus" | awk -F'= +' '/Nodes Healthy/{print $2}') + if [[ "$foundNodes" == 0 ]]; then + last_error="expected plugin to have at least 1 healthy nodes, found none" + return 1 + fi + return 0 +} + +registerVolume() { + local externalID idempotencyToken + idempotencyToken=$(uuidgen) + + externalID=$(aws ec2 describe-volumes + --filters "Name=tag:Cluster,Values=$CLUSTER_NAME" + --output=json | jq '.Volumes[0].VolumeId') || { + echo "Could not find volume for $CLUSTER_NAME" + exit 1 + } + + sed -e "s/IDEMPOTENCY_TOKEN/$idempotencyToken/" \ + -e "s/EXTERNAL_ID/$externalID/" \ + volume.hcl.tpl | nomad volume register - +} + +while : +do + checkPlugin && break + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ "$elapsed_time" -ge "$TIMEOUT" ]; then + echo "Error: CSI plugin did not become available within $TIMEOUT seconds." + exit 1 + fi + + sleep "$INTERVAL" +done + +registerVolume +nomad volume status -type csi diff --git a/enos/modules/run_workloads/variables.tf b/enos/modules/run_workloads/variables.tf index 6bd96875ec3..f1bf1cf4ecb 100644 --- a/enos/modules/run_workloads/variables.tf +++ b/enos/modules/run_workloads/variables.tf @@ -35,5 +35,11 @@ variable "workloads" { job_spec = string alloc_count = number type = string + post_script = optional(string) })) } + +variable "cluster_name" { + description = "The name of the cluster, which we need to find the AZ" + type = string +}