diff --git a/docker-compose.unit-tests.yml b/docker-compose.unit-tests.yml index 0c39813f7..5bcb0246f 100644 --- a/docker-compose.unit-tests.yml +++ b/docker-compose.unit-tests.yml @@ -2,8 +2,8 @@ version: '3' services: unit-tests: - image: lucor/bats + build: ./unit-tests volumes: - .:/src:ro working_dir: /src - command: bats /src/unit-tests/ \ No newline at end of file + command: bats /src/unit-tests/ diff --git a/packer/linux/conf/bin/bk-check-disk-space.sh b/packer/linux/conf/bin/bk-check-disk-space.sh index 3c25f11ef..df88c8a40 100755 --- a/packer/linux/conf/bin/bk-check-disk-space.sh +++ b/packer/linux/conf/bin/bk-check-disk-space.sh @@ -1,25 +1,50 @@ #!/bin/bash set -euo pipefail -DISK_MIN_AVAILABLE=${DISK_MIN_AVAILABLE:-5242880} # 5GB -DISK_MIN_INODES=${DISK_MIN_INODES:-250000} # docker needs lots - -DOCKER_DIR="/var/lib/docker/" - -disk_avail=$(df -k --output=avail "$DOCKER_DIR" | tail -n1) - -echo "Disk space free: $(df -k -h --output=avail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')" - -if [[ $disk_avail -lt $DISK_MIN_AVAILABLE ]]; then - echo "Not enough disk space free, cutoff is ${DISK_MIN_AVAILABLE} 🚨" >&2 - exit 1 +# Usage: +# bk-check-disk-space.sh (min disk required) (min inodes required) +# min disk required can be either an amount of bytes, a pattern like 10G +# or 500M, or a percentage like 5% +# min inodes must be a number, default to 250,000 + +min_available=${1:-5G} +docker_dir="/var/lib/docker/" + +# First check the disk available + +disk_avail=$(df -k --output=avail "$docker_dir" | tail -n1) +disk_avail_human=$(df -k -h --output=avail "$docker_dir" | tail -n1 | tr -d '[:space:]') +disk_used_pct=$(df -k --output=pcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%') +disk_free_pct=$((100-disk_used_pct)) + +printf "Disk space free: %s (%s%%)\\n" "$disk_avail_human" "$disk_free_pct" + +# Check if the min_available is a percentage +if [[ $min_available =~ \%$ ]] ; then + if [[ $(echo "${disk_free_pct}<${min_available}" | sed 's/%//g' | bc) -gt 0 ]] ; then + echo "Not enough disk space free, cutoff percentage is ${min_available} 🚨" >&2 + exit 1 + fi +else + disk_avail_bytes="$((disk_avail*1024))" + min_available_bytes="$(/usr/local/bin/bk-parse-byte-units.sh "$min_available")" + if [[ $disk_avail_bytes -lt $min_available_bytes ]]; then + echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2 + exit 1 + fi fi -inodes_avail=$(df -k --output=iavail "$DOCKER_DIR" | tail -n1) +# Next check inodes, these can be exhausted by docker build operations + +inodes_min_available=${2:-250000} +inodes_avail=$(df -k --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]') +inodes_avail_human=$(df -k -h --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]') +inodes_used_pct=$(df -k --output=ipcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%') +inodes_free_pct=$((100-inodes_used_pct)) -echo "Inodes free: $(df -k -h --output=iavail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')" +printf "Inodes free: %s (%s%%)\\n" "$inodes_avail_human" "$inodes_free_pct" -if [[ $inodes_avail -lt $DISK_MIN_INODES ]]; then - echo "Not enough inodes free, cutoff is ${DISK_MIN_INODES} 🚨" >&2 +if [[ $inodes_avail -lt $inodes_min_available ]]; then + echo "Not enough inodes free, cutoff is ${inodes_min_available} 🚨" >&2 exit 1 fi diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index 4c55c0cbf..982f11151 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -62,6 +62,12 @@ export PLUGINS_ENABLED="${PLUGINS_ENABLED[*]-}" export BUILDKITE_ECR_POLICY=${BUILDKITE_ECR_POLICY:-none} EOF +# cron-env is sourced by crontab entries and low disk scripts +cat << EOF > /var/lib/buildkite-agent/cron-env +export DISK_MIN_AVAILABLE=$DISK_MIN_AVAILABLE +export DOCKER_PRUNE_UNTIL=$DOCKER_PRUNE_UNTIL +EOF + if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]] ; then echo "Downloading buildkite-agent edge..." curl -Lsf -o /usr/bin/buildkite-agent-edge \ diff --git a/packer/linux/conf/bin/bk-parse-byte-units.sh b/packer/linux/conf/bin/bk-parse-byte-units.sh new file mode 100755 index 000000000..d68ba62f7 --- /dev/null +++ b/packer/linux/conf/bin/bk-parse-byte-units.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Converts human-readable units like 1.43K and 120.3M to bytes + +if [[ ! "${1:-}" =~ ^[0-9] ]] ; then + printf "Invalid input, must start with a number: %q\n" "${1:-}" >&2 + exit 1 +fi + +/usr/bin/awk \ + '/[0-9][bB]?$/ {printf "%u\n", $1*1} + /[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024*1024)} + /[gG][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)} + /[mM][bB]?$/ {printf "%u\n", $1*(1024*1024)} + /[kK][bB]?$/ {printf "%u\n", $1*1024}' <<< "$1" diff --git a/packer/linux/conf/buildkite-agent/hooks/environment b/packer/linux/conf/buildkite-agent/hooks/environment index d5995e13f..b853dba19 100755 --- a/packer/linux/conf/buildkite-agent/hooks/environment +++ b/packer/linux/conf/buildkite-agent/hooks/environment @@ -7,6 +7,11 @@ source ~/cfn-env echo "~~~ :llama: Setting up elastic stack environment ($BUILDKITE_STACK_VERSION)" cat ~/cfn-env +if [[ -f ~/cron-env ]] ; then + # shellcheck source=/dev/null + source ~/cron-env +fi + echo "Checking docker" if ! docker ps ; then echo "^^^ +++" @@ -17,13 +22,13 @@ if ! docker ps ; then fi echo "Checking disk space" -if ! /usr/local/bin/bk-check-disk-space.sh ; then +if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL:-4h}" docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL:-4h}" echo "Checking disk space again" - if ! /usr/local/bin/bk-check-disk-space.sh ; then + if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}"; then echo "Disk health checks failed" >&2 exit 1 fi diff --git a/packer/linux/conf/docker/cron.hourly/docker-gc b/packer/linux/conf/docker/cron.hourly/docker-gc index 1ab07e68f..71f85ae0a 100755 --- a/packer/linux/conf/docker/cron.hourly/docker-gc +++ b/packer/linux/conf/docker/cron.hourly/docker-gc @@ -5,10 +5,16 @@ if [[ $EUID -eq 0 ]]; then exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log fi -DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-4h} +# Load config from file if it exists +if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then + # shellcheck source=/dev/null + source /var/lib/buildkite-agent/cron-env +else + DOCKER_PRUNE_UNTIL=4h +fi ## ------------------------------------------ ## Prune stuff that doesn't affect cache hits -docker network prune --force --filter "until=${DOCKER_PRUNE_UNTIL}" -docker container prune --force --filter "until=${DOCKER_PRUNE_UNTIL}" +docker network prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}" +docker container prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}" diff --git a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc index ff68b64dc..c0697c9c3 100644 --- a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc +++ b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc @@ -5,8 +5,6 @@ if [[ $EUID -eq 0 ]]; then exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log fi -DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-1h} - mark_instance_unhealthy() { # cancel any running buildkite builds killall -QUIT buildkite-agent || true @@ -19,14 +17,20 @@ mark_instance_unhealthy() { trap mark_instance_unhealthy ERR +# Load config from file if it exists +if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then + # shellcheck source=/dev/null + source /var/lib/buildkite-agent/cron-env +fi + ## ----------------------------------------------------------------- ## Check disk, we only want to prune images/containers if we have to -if ! /usr/local/bin/bk-check-disk-space.sh ; then - echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL}" - docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL}" +if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then + echo "Cleaning up docker resources older than 1h" + docker image prune --all --force --filter "until=1h" - if ! /usr/local/bin/bk-check-disk-space.sh ; then + if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then echo "Disk health checks failed" >&2 exit 1 fi diff --git a/templates/aws-stack.yml b/templates/aws-stack.yml index f6855d9fb..12de96435 100644 --- a/templates/aws-stack.yml +++ b/templates/aws-stack.yml @@ -375,6 +375,16 @@ Parameters: - "false" Default: "false" + MinimumDiskAvailableBeforeCleanup: + Type: String + Description: Either a percentage (%) or absolute unit (B, MB, GB) of disk below which disk cleanup is run + Default: "2GB" + + DockerPruneUntil: + Type: String + Description: How far back to prune docker networks images and containers on hourly cleanup + Default: "4h" + Outputs: VpcId: Value: @@ -857,6 +867,8 @@ Resources: BUILDKITE_ECR_POLICY=${ECRAccessPolicy} \ BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB=${BuildkiteTerminateInstanceAfterJob} \ BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS=${BuildkiteAdditionalSudoPermissions} \ + DISK_MIN_AVAILABLE="${MinimumDiskAvailableBeforeCleanup}" \ + DOCKER_PRUNE_UNTIL="${DockerPruneUntil}" \ AWS_DEFAULT_REGION=${AWS::Region} \ SECRETS_PLUGIN_ENABLED=${EnableSecretsPlugin} \ ECR_PLUGIN_ENABLED=${EnableECRPlugin} \ diff --git a/unit-tests/Dockerfile b/unit-tests/Dockerfile new file mode 100644 index 000000000..306a96ed2 --- /dev/null +++ b/unit-tests/Dockerfile @@ -0,0 +1,10 @@ +FROM amazonlinux:2 + +RUN yum install -y git + +RUN git clone https://github.com/bats-core/bats-core.git \ + && cd bats-core \ + && git checkout v1.1.0 \ + && ./install.sh /usr/local + +CMD [ "bash", "/usr/local/bin/bats", "/src/unit-tests/*.bats"] diff --git a/unit-tests/fix-buildkite-agent-builds-permissions.bats b/unit-tests/fix-buildkite-agent-builds-permissions.bats index 4a4af13e9..cd450472b 100644 --- a/unit-tests/fix-buildkite-agent-builds-permissions.bats +++ b/unit-tests/fix-buildkite-agent-builds-permissions.bats @@ -5,29 +5,14 @@ FIX_PERMISSIONS_SCRIPT="/src/packer/linux/conf/buildkite-agent/scripts/fix-build @test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "/" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc/" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "/abc" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc/def" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc/def/ghi" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the agent arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "/abc/" "abc" "abc" [ "$status" -eq 1 ] } @@ -35,29 +20,14 @@ FIX_PERMISSIONS_SCRIPT="/src/packer/linux/conf/buildkite-agent/scripts/fix-build @test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "/" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc/" "abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "/abc" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc/def" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc/def/ghi" "abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the org arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "/abc/" "abc" [ "$status" -eq 1 ] } @@ -65,29 +35,14 @@ FIX_PERMISSIONS_SCRIPT="/src/packer/linux/conf/buildkite-agent/scripts/fix-build @test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "/" [ "$status" -eq 1 ] -} - -@test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "abc/" [ "$status" -eq 1 ] -} - -@test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "/abc" [ "$status" -eq 1 ] -} - -@test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "abc/def" [ "$status" -eq 1 ] -} - -@test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "abc/def/ghi" [ "$status" -eq 1 ] -} - -@test "Slashes in the pipeline arg cause an exit 1" { run "$FIX_PERMISSIONS_SCRIPT" "abc" "abc" "/abc/" [ "$status" -eq 1 ] } diff --git a/unit-tests/parse-byte-units.bats b/unit-tests/parse-byte-units.bats new file mode 100644 index 000000000..550bc1cb9 --- /dev/null +++ b/unit-tests/parse-byte-units.bats @@ -0,0 +1,95 @@ +#!/usr/bin/env bats + +PARSE_BYTE_UNITS_SCRIPT="/src/packer/linux/conf/bin/bk-parse-byte-units.sh" + +@test "parse with invalid input" { + run "$PARSE_BYTE_UNITS_SCRIPT" "llamas" + [ "$status" -eq 1 ] +} + +@test "parse with no input" { + run "$PARSE_BYTE_UNITS_SCRIPT" + [ "$status" -eq 1 ] +} + +@test "parse without unit" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45" + [ "$status" -eq 0 ] + [ "$output" = "45" ] +} + +@test "parse with bytes" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45b" + [ "$status" -eq 0 ] + [ "$output" = "45" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45B" + [ "$status" -eq 0 ] + [ "$output" = "45" ] +} + +@test "parse with kilobytes" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45kb" + [ "$status" -eq 0 ] + [ "$output" = "46080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45KB" + [ "$status" -eq 0 ] + [ "$output" = "46080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45Kb" + [ "$status" -eq 0 ] + [ "$output" = "46080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45K" + [ "$status" -eq 0 ] + [ "$output" = "46080" ] +} + +@test "parse with megabytes" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45mb" + [ "$status" -eq 0 ] + [ "$output" = "47185920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45MB" + [ "$status" -eq 0 ] + [ "$output" = "47185920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45Mb" + [ "$status" -eq 0 ] + [ "$output" = "47185920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45M" + [ "$status" -eq 0 ] + [ "$output" = "47185920" ] +} + +@test "parse with gigabytes" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45gb" + [ "$status" -eq 0 ] + [ "$output" = "48318382080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45GB" + [ "$status" -eq 0 ] + [ "$output" = "48318382080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45Gb" + [ "$status" -eq 0 ] + [ "$output" = "48318382080" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45G" + [ "$status" -eq 0 ] + [ "$output" = "48318382080" ] +} + +@test "parse with terabytes" { + run "$PARSE_BYTE_UNITS_SCRIPT" "45tb" + [ "$status" -eq 0 ] + [ "$output" = "49478023249920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45TB" + [ "$status" -eq 0 ] + [ "$output" = "49478023249920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45Tb" + [ "$status" -eq 0 ] + [ "$output" = "49478023249920" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45T" + [ "$status" -eq 0 ] + [ "$output" = "49478023249920" ] +} + +@test "parse with decimals" { + run "$PARSE_BYTE_UNITS_SCRIPT" "1.5gb" + [ "$status" -eq 0 ] + [ "$output" = "1610612736" ] + run "$PARSE_BYTE_UNITS_SCRIPT" "45TB" +}