Skip to content

Commit

Permalink
upgrade tests: add CSI workload
Browse files Browse the repository at this point in the history
Add an upgrade test workload for CSI with the AWS EBS plugin. In order to
validate this workload, we'll need to deploy the plugin jobs and then create
volumes with them. So this extends the `run_workloads` module to allow for a
"wait script" to be run after a given job has been deployed. We can use that as
a model for other test workloads.

Ref: https://hashicorp.atlassian.net/browse/NET-12217
  • Loading branch information
tgross committed Feb 25, 2025
1 parent 4693f0b commit 71fe537
Show file tree
Hide file tree
Showing 7 changed files with 286 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "alloc_count" {
type = number
default = 2
}

job "plugin-aws-ebs-controller" {

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "controller" {

count = var.alloc_count

task "plugin" {
driver = "docker"

config {
image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0"

args = [
"controller",
"--endpoint=${CSI_ENDPOINT}",
"--logtostderr",
"--v=5",
]
}

csi_plugin {
id = "aws-ebs0"
type = "controller"
mount_dir = "/csi"
}

resources {
cpu = 100
memory = 256
}
}
}
}
42 changes: 42 additions & 0 deletions enos/modules/run_workloads/jobs/plugin-aws-ebs-nodes.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

job "plugin-aws-ebs-nodes" {

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

type = "system"

group "nodes" {
task "plugin" {
driver = "docker"

config {
image = "public.ecr.aws/ebs-csi-driver/aws-ebs-csi-driver:v1.33.0"

args = [
"node",
"--endpoint=${CSI_ENDPOINT}",
"--logtostderr",
"--v=5",
]

privileged = true
}

csi_plugin {
id = "aws-ebs0"
type = "node"
mount_dir = "/csi"
}

resources {
cpu = 100
memory = 256
}
}
}
}
76 changes: 76 additions & 0 deletions enos/modules/run_workloads/jobs/wants-volume.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

variable "alloc_count" {
type = number
default = 2
}

# a job that mounts an EBS volume and writes its job ID as a file
job "wants-ebs-volume" {

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group" {
count = var.alloc_count

volume "test" {
type = "csi"
source = "ebsVolume"
attachment_mode = "file-system"
access_mode = "single-node-writer"
per_alloc = true
}

task "task" {
driver = "docker"

config {
image = "busybox:1"
command = "httpd"
args = ["-vv", "-f", "-p", "8001", "-h", "/local"]
}

volume_mount {
volume = "test"
destination = "${NOMAD_TASK_DIR}/test"
read_only = false
}

resources {
cpu = 100
memory = 64
}
}

task "sidecar" {
driver = "docker"

config {
image = "busybox:1"
command = "/bin/sh"
args = ["-c", "echo '${NOMAD_ALLOC_ID}' > ${NOMAD_TASK_DIR}/index.html"]
}

lifecycle {
hook = "poststart"
sidecar = true
}

volume_mount {
volume = "test"
destination = "${NOMAD_TASK_DIR}/test"
read_only = false
}

resources {
cpu = 10
memory = 10
}

}
}
}
5 changes: 4 additions & 1 deletion enos/modules/run_workloads/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,8 @@ resource "enos_local_exec" "workloads" {

environment = local.nomad_env

inline = ["nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}"]
inline = [
"nomad job run -var alloc_count=${each.value.alloc_count} ${path.module}/${each.value.job_spec}",
each.value.wait_script != "" ? each.value.wait_script : "echo ok"
]
}
34 changes: 34 additions & 0 deletions enos/modules/run_workloads/scripts/volume.hcl.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

id = "VOLUME_ID"
name = "IDEMPOTENCY_TOKEN" # CSIVolumeName tag, must be unique and idempotent
type = "csi"
plugin_id = "aws-ebs0"

capacity_min = "10GiB"
capacity_max = "20GiB"

capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}

capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}

parameters {
type = "gp2"
}

topology_request {
required {
topology {
segments {
"topology.ebs.csi.aws.com/zone" = "AWS_ZONE"
}
}
}
}
78 changes: 78 additions & 0 deletions enos/modules/run_workloads/scripts/wait_for_ebs_plugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

set -euo pipefail

# note: it can a very long time for plugins to come up
TIMEOUT=60
INTERVAL=2
last_error=
start_time=$(date +%s)

checkPlugin() {
local pluginStatus foundControllers foundNodes
pluginStatus=$(nomad plugin status aws-ebs0) || {
last_error="could not read CSI plugin status"
return 1
}

foundControllers=$(echo "$pluginStatus" | awk -F'= +' '/Controllers Healthy/{print $2}')
if [[ "$foundControllers" != 2 ]]; then
echo "$foundControllers"
last_error="expected plugin to have 2 healthy controllers, found $foundControllers"
return 1
fi

foundNodes=$(echo "$pluginStatus" | awk -F'= +' '/Nodes Healthy/{print $2}')
if [[ "$foundNodes" == 0 ]]; then
last_error="expected plugin to have at least 1 healthy nodes, found none"
return 1
fi
return 0
}

awsZone=

findZone() {
local firstNodeID
firstNodeID=$(nomad node status -json | jq -r '.[0].ID') || {
echo "could not read node status"
exit 1
}
awsZone=$(nomad node status -json "$firstNodeID" | jq -r '.Attributes."platform.aws.placement.availability-zone"') || {
echo "could not read AWS zone from node status"
exit 1
}
}

createVolume() {
local volumeID idempotencyToken
volumeID=$1
idempotencyToken=$(uuidgen)

sed -e "s/VOLUME_ID/ebsVolume[$volumeID]/" \
-e "s/IDEMPOTENCY_TOKEN/$idempotencyToken/" \
-e "s/AWS_ZONE/$awsZone/" \
volume.hcl.tpl | nomad volume create -
}

while :
do
checkPlugin && break

current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ "$elapsed_time" -ge "$TIMEOUT" ]; then
echo "Error: CSI plugin did not become available within $TIMEOUT seconds."
exit 1
fi

sleep "$INTERVAL"
done

findZone
createVolume 0
createVolume 1

nomad volume status -type csi
7 changes: 7 additions & 0 deletions enos/modules/run_workloads/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,17 @@ variable "workloads" {
type = map(object({
job_spec = string
alloc_count = number
wait_script = string
}))

default = {
# deploy these first
csi_plugin_ebs_controller = { job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl", alloc_count = 2 }
csi_plugin_ebs_node = { job_spec = "jobs/plugin-aws-ebs-controller.nomad.hcl", alloc_count = 0, wait_scipt = "scripts/wait_for_ebs_plugin.sh" }

service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3 }
service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3 }
wants_csi = { job_spec = "jobs/wants-volume.nomad.hcl", alloc_count = 2 }

}
}

0 comments on commit 71fe537

Please sign in to comment.