Skip to content

Commit

Permalink
Added wait condition until the node count equals the desired capacity.
Browse files Browse the repository at this point in the history
  • Loading branch information
simonzhekoff committed Dec 10, 2024
1 parent 5380ab0 commit 2948e73
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 3 deletions.
6 changes: 6 additions & 0 deletions modules/graphdb/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ resource "aws_autoscaling_group" "graphdb_auto_scaling_group" {

target_group_arns = var.graphdb_target_group_arns

instance_maintenance_policy {
min_healthy_percentage = var.instance_maintenance_policy_min_healthy_percentage
max_healthy_percentage = var.instance_maintenance_policy_max_healthy_percentage
}

launch_template {
id = aws_launch_template.graphdb.id
version = aws_launch_template.graphdb.latest_version
Expand Down Expand Up @@ -119,3 +124,4 @@ resource "aws_autoscaling_group" "graphdb_auto_scaling_group" {
}
}
}

66 changes: 63 additions & 3 deletions modules/graphdb/templates/00_functions.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,72 @@
#!/usr/bin/env bash

# Generic helper functions

# Function to print messages with timestamps
# Function to log messages with a timestamp
log_with_timestamp() {
echo "$(date '+%Y-%m-%d %H:%M:%S'): $1"
}

# Function to check ASG node counts
wait_for_asg_nodes() {
local ASG_NAME="$1"
local RETRY_DELAY=10
local MAX_RETRIES=65
local RETRY_COUNT=0

# Get the desired capacity of the ASG
local NODE_COUNT
NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].DesiredCapacity" \
--output text)

# Check if NODE_COUNT is not an integer
if ! [[ "$NODE_COUNT" =~ ^[0-9]+$ ]]; then
log_with_timestamp "Error: Unable to retrieve valid Desired Capacity for ASG: $ASG_NAME. Received value: $NODE_COUNT."
exit 1
fi

log_with_timestamp "Checking ASG node count for $ASG_NAME with desired node count: $NODE_COUNT"

while true; do
# Check InService and Terminating states via ASG
local IN_SERVICE_NODE_COUNT
IN_SERVICE_NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].Instances[?LifecycleState=='InService'] | length(@)" \
--output text)

local TERMINATING_NODE_COUNT
TERMINATING_NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].Instances[?LifecycleState=='Terminating'] | length(@)" \
--output text)

local SHUTTING_DOWN_NODE_COUNT
SHUTTING_DOWN_NODE_COUNT=$(aws ec2 describe-instances \
--filters "Name=instance-state-name,Values=shutting-down" \
--query "Reservations[].Instances[].InstanceId | length(@)" \
--output text)

log_with_timestamp "InService: $IN_SERVICE_NODE_COUNT, Terminating: $TERMINATING_NODE_COUNT, Shutting-down: $SHUTTING_DOWN_NODE_COUNT, Desired: $NODE_COUNT"

if [[ -z "$IN_SERVICE_NODE_COUNT" || "$IN_SERVICE_NODE_COUNT" -le "$NODE_COUNT" ]] \
&& [[ "$TERMINATING_NODE_COUNT" -eq 0 ]] \
&& [[ "$SHUTTING_DOWN_NODE_COUNT" -eq 0 ]]; then
log_with_timestamp "Conditions met: InService <= $NODE_COUNT, no Terminating, no Shutting-down. Proceeding..."
break
else
if [ "$RETRY_COUNT" -ge "$MAX_RETRIES" ]; then
log_with_timestamp "Error: Maximum retry attempts reached. Exiting..."
exit 1
fi

log_with_timestamp "Conditions not met. Waiting... (InService: $IN_SERVICE_NODE_COUNT, Terminating: $TERMINATING_NODE_COUNT, Shutting-down: $SHUTTING_DOWN_NODE_COUNT)"
sleep "$RETRY_DELAY"
RETRY_COUNT=$((RETRY_COUNT + 1))
fi
done
}

# Function which waits for all DNS records to be created
wait_dns_records() {
local ZONE_ID="$1"
Expand Down
9 changes: 9 additions & 0 deletions modules/graphdb/templates/01_wait_node_count.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ echo "#####################################################"
IMDS_TOKEN=$(curl -Ss -H "X-aws-ec2-metadata-token-ttl-seconds: 6000" -XPUT 169.254.169.254/latest/api/token)
AZ=$(curl -Ss -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" 169.254.169.254/latest/meta-data/placement/availability-zone)
ASG_NAME=${name}
GRAPHDB_NODE_COUNT=${node_count}

# Only run the wait_asg_nodes function if graphdb_node_count is more than 1
if [ "$GRAPHDB_NODE_COUNT" -gt 1 ]; then
echo "GraphDB node count is greater than 1. Running wait_asg_nodes..."
wait_for_asg_nodes "$ASG_NAME"
else
echo "GraphDB node count is 1 or less. Skipping wait_asg_nodes."
fi

instance_refresh_status=$(aws autoscaling describe-instance-refreshes --auto-scaling-group-name "$ASG_NAME" --query 'InstanceRefreshes[?Status==`InProgress`]' --output json)

Expand Down
1 change: 1 addition & 0 deletions modules/graphdb/user_data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ data "cloudinit_config" "graphdb_user_data" {
content_type = "text/x-shellscript"
content = templatefile("${path.module}/templates/01_wait_node_count.sh.tpl", {
name : var.resource_name_prefix
node_count : var.graphdb_node_count
})
}

Expand Down
12 changes: 12 additions & 0 deletions modules/graphdb/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,15 @@ variable "ebs_default_kms_key" {
description = "Define default KMS key"
type = string
}

variable "instance_maintenance_policy_min_healthy_percentage" {
description = "Define minimum healthy percentage for the Instance Maintenance Policy"
type = number
default = 66
}

variable "instance_maintenance_policy_max_healthy_percentage" {
description = "Define maximum healthy percentage for the Instance Maintenance Policy"
type = number
default = 100
}

0 comments on commit 2948e73

Please sign in to comment.