From 71fe537a8ca5703020e84d4ee6c5800a49fa63f9 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 25 Feb 2025 15:20:09 -0500 Subject: [PATCH] upgrade tests: add CSI workload Add an upgrade test workload for CSI with the AWS EBS plugin. In order to validate this workload, we'll need to deploy the plugin jobs and then create volumes with them. So this extends the `run_workloads` module to allow for a "wait script" to be run after a given job has been deployed. We can use that as a model for other test workloads. Ref: https://hashicorp.atlassian.net/browse/NET-12217 --- .../jobs/plugin-aws-ebs-controller.nomad.hcl | 45 +++++++++++ .../jobs/plugin-aws-ebs-nodes.nomad.hcl | 42 ++++++++++ .../run_workloads/jobs/wants-volume.nomad.hcl | 76 ++++++++++++++++++ enos/modules/run_workloads/main.tf | 5 +- .../run_workloads/scripts/volume.hcl.tpl | 34 ++++++++ .../scripts/wait_for_ebs_plugin.sh | 78 +++++++++++++++++++ enos/modules/run_workloads/variables.tf | 7 ++ 7 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl create mode 100644 enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl create mode 100644 enos/modules/run_workloads/jobs/wants-volume.nomad.hcl create mode 100644 enos/modules/run_workloads/scripts/volume.hcl.tpl create mode 100755 enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh diff --git a/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl b/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl new file mode 100644 index 00000000000..0e7bbbf2bb0 --- /dev/null +++ b/enos/modules/run_workloads/jobs/plugin-aws-ebs-controller.nomad.hcl @@ -0,0 +1,45 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +variable "alloc_count" { + type = number + default = 2 +} + +job "plugin-aws-ebs-controller" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "controller" { + + count = var.alloc_count + + task "plugin" { + driver = "docker" + + config { + image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0" + + args = [ + "controller", + "--endpoint=${CSI_ENDPOINT}", + "--logtostderr", + "--v=5", + ] + } + + csi_plugin { + id = "aws-ebs0" + type = "controller" + mount_dir = "/csi" + } + + resources { + cpu = 100 + memory = 256 + } + } + } +} diff --git a/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl b/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl new file mode 100644 index 00000000000..a8cbf2a138d --- /dev/null +++ b/enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl @@ -0,0 +1,42 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "plugin-aws-ebs-nodes" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + type = "system" + + group "nodes" { + task "plugin" { + driver = "docker" + + config { + image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0" + + args = [ + "node", + "--endpoint=${CSI_ENDPOINT}", + "--logtostderr", + "--v=5", + ] + + privileged = true + } + + csi_plugin { + id = "aws-ebs0" + type = "node" + mount_dir = "/csi" + } + + resources { + cpu = 100 + memory = 256 + } + } + } +} diff --git a/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl b/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl new file mode 100644 index 00000000000..4dd0b9d73d8 --- /dev/null +++ b/enos/modules/run_workloads/jobs/wants-volume.nomad.hcl @@ -0,0 +1,76 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +variable "alloc_count" { + type = number + default = 2 +} + +# a job that mounts an EBS volume and writes its job ID as a file +job "wants-ebs-volume" { + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "group" { + count = var.alloc_count + + volume "test" { + type = "csi" + source = "ebsVolume" + attachment_mode = "file-system" + access_mode = "single-node-writer" + per_alloc = true + } + + task "task" { + driver = "docker" + + config { + image = "busybox:1" + command = "httpd" + args = ["-vv", "-f", "-p", "8001", "-h", "/local"] + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 100 + memory = 64 + } + } + + task "sidecar" { + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["-c", "echo '${NOMAD_ALLOC_ID}' > ${NOMAD_TASK_DIR}/index.html"] + } + + lifecycle { + hook = "poststart" + sidecar = true + } + + volume_mount { + volume = "test" + destination = "${NOMAD_TASK_DIR}/test" + read_only = false + } + + resources { + cpu = 10 + memory = 10 + } + + } + } +} diff --git a/enos/modules/run_workloads/main.tf b/enos/modules/run_workloads/main.tf index 766e0066896..dc63b4f2a00 100644 --- a/enos/modules/run_workloads/main.tf +++ b/enos/modules/run_workloads/main.tf @@ -28,5 +28,8 @@ resource "enos_local_exec" "workloads" { environment = local.nomad_env - inline = ["nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}"] + inline = [ + "nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}", + each.value.wait_script != "" ? each.value.wait_script : "echo ok" + ] } diff --git a/enos/modules/run_workloads/scripts/volume.hcl.tpl b/enos/modules/run_workloads/scripts/volume.hcl.tpl new file mode 100644 index 00000000000..12e2e1bbb4a --- /dev/null +++ b/enos/modules/run_workloads/scripts/volume.hcl.tpl @@ -0,0 +1,34 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +id = "VOLUME_ID" +name = "IDEMPOTENCY_TOKEN" # CSIVolumeName tag, must be unique and idempotent +type = "csi" +plugin_id = "aws-ebs0" + +capacity_min = "10GiB" +capacity_max = "20GiB" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "block-device" +} + +parameters { + type = "gp2" +} + +topology_request { + required { + topology { + segments { + "topology.ebs.csi.aws.com/zone" = "AWS_ZONE" + } + } + } +} diff --git a/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh b/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh new file mode 100755 index 00000000000..5cb20b77875 --- /dev/null +++ b/enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +# note: it can a very long time for plugins to come up +TIMEOUT=60 +INTERVAL=2 +last_error= +start_time=$(date +%s) + +checkPlugin() { + local pluginStatus foundControllers foundNodes + pluginStatus=$(nomad plugin status aws-ebs0) || { + last_error="could not read CSI plugin status" + return 1 + } + + foundControllers=$(echo "$pluginStatus" | awk -F'= +' '/Controllers Healthy/{print $2}') + if [[ "$foundControllers" != 2 ]]; then + echo "$foundControllers" + last_error="expected plugin to have 2 healthy controllers, found $foundControllers" + return 1 + fi + + foundNodes=$(echo "$pluginStatus" | awk -F'= +' '/Nodes Healthy/{print $2}') + if [[ "$foundNodes" == 0 ]]; then + last_error="expected plugin to have at least 1 healthy nodes, found none" + return 1 + fi + return 0 +} + +awsZone= + +findZone() { + local firstNodeID + firstNodeID=$(nomad node status -json | jq -r '.[0].ID') || { + echo "could not read node status" + exit 1 + } + awsZone=$(nomad node status -json "$firstNodeID" | jq -r '.Attributes."platform.aws.placement.availability-zone"') || { + echo "could not read AWS zone from node status" + exit 1 + } +} + +createVolume() { + local volumeID idempotencyToken + volumeID=$1 + idempotencyToken=$(uuidgen) + + sed -e "s/VOLUME_ID/ebsVolume[$volumeID]/" \ + -e "s/IDEMPOTENCY_TOKEN/$idempotencyToken/" \ + -e "s/AWS_ZONE/$awsZone/" \ + volume.hcl.tpl | nomad volume create - +} + +while : +do + checkPlugin && break + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ "$elapsed_time" -ge "$TIMEOUT" ]; then + echo "Error: CSI plugin did not become available within $TIMEOUT seconds." + exit 1 + fi + + sleep "$INTERVAL" +done + +findZone +createVolume 0 +createVolume 1 + +nomad volume status -type csi diff --git a/enos/modules/run_workloads/variables.tf b/enos/modules/run_workloads/variables.tf index 6281d988c32..8faf09a2bb7 100644 --- a/enos/modules/run_workloads/variables.tf +++ b/enos/modules/run_workloads/variables.tf @@ -34,10 +34,17 @@ variable "workloads" { type = map(object({ job_spec = string alloc_count = number + wait_script = string })) default = { + # deploy these first + csi_plugin_ebs_controller = { job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl", alloc_count = 2 } + csi_plugin_ebs_node = { job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl", alloc_count = 0, wait_scipt = "scripts/wait_for_ebs_plugin.sh" } + service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3 } service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3 } + wants_csi = { job_spec = "jobs/wants-volume.nomad.hcl", alloc_count = 2 } + } }