Skip to content

Commit

Permalink
Merge pull request #105 from Ontotext-AD/GDB-11403-Add-DNS-waiting-co…
Browse files Browse the repository at this point in the history
…ndition

Added DNS Wait condition and fixed Resource Lock
  • Loading branch information
simonzhekoff authored Jan 7, 2025
2 parents a2d5d50 + 2352819 commit 5a4c187
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 21 deletions.
13 changes: 11 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,25 @@ jobs:
name: Run Terraform validate check
run: terraform validate

- id: install_latest_trivy_version
uses: aquasecurity/[email protected]
with:
cache: true
version: v0.58.1

- id: run_trivy_config
name: Run Trivy vulnerability scanner
# aquasecurity/[email protected]
uses: aquasecurity/trivy-action@91713af97dc80187565512baba96e4364e983601
uses: aquasecurity/[email protected]
with:
scan-type: config
trivy-config: trivy.yaml
ignore-unfixed: true
hide-progress: false
format: sarif
output: trivy.sarif
# TODO Remove this when fix is available for the error regarding the
# unknown state of some of the resources during plan phase
continue-on-error: true # Allows the job to continue even if this step fails

- id: run_sarif_upload
name: Upload Trivy SARIF results
Expand Down
55 changes: 36 additions & 19 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,46 @@ locals {
}, var.tags)
admin_security_principle_id = var.admin_security_principle_id != null ? var.admin_security_principle_id : data.azurerm_client_config.current.object_id

static_keys = {
vnet = "virtual_network"
application_gateway_subnet = "gateway_subnet"
subnets = "vmss_subnet"
backup = "backup_storage"
monitoring = "monitoring_workspace"
appconfig = "app_configuration"
application_gateway = "application_gateway"
vault = "key_vault"
vmss = "vmss"
}

resources_to_lock = {
"vnet" = azurerm_virtual_network.graphdb[0].id,
"application_gateway_subnet" = var.disable_agw ? null : azurerm_subnet.graphdb_gateway.id,
"subnets" = azurerm_subnet.graphdb_vmss.id,
"backup" = module.backup.storage_account_id,
"monitoring" = var.deploy_monitoring ? module.monitoring[0].la_workspace_id : null,
"appconfig" = module.appconfig.app_configuration_id,
"application_gateway" = var.disable_agw ? null : module.application_gateway[0].gateway_id,
"vnet" = azurerm_virtual_network.graphdb[0].id
"application_gateway_subnet" = azurerm_subnet.graphdb_gateway.id
"subnets" = azurerm_subnet.graphdb_vmss.id
"backup" = module.backup.storage_account_id
"monitoring" = var.deploy_monitoring ? module.monitoring[0].la_workspace_id : null
"appconfig" = module.appconfig.app_configuration_id
"application_gateway" = var.disable_agw ? null : module.application_gateway[0].gateway_id
"vault" = module.vault[0].key_vault_id
"vmss" = module.graphdb.vmss_resource_id
}

resources_to_lock_filtered = {
for key, value in local.static_keys :
key => try(local.resources_to_lock[key], null)
if(key != "monitoring" || var.deploy_monitoring) &&
(key != "application_gateway" && key != "application_gateway_subnet" || !var.disable_agw) &&
value != null && value != ""
}
}

# Management Lock for Resources
resource "azurerm_management_lock" "graphdb_rg_lock" {
for_each = var.lock_resources ? local.resources_to_lock_filtered : {}

name = each.key
scope = each.value
lock_level = "CanNotDelete"
}

resource "azurerm_resource_group" "graphdb" {
Expand All @@ -49,18 +78,6 @@ locals {
resource_group_name = var.resource_group_name != null ? data.azurerm_resource_group.existing_graphdb_rg[0].name : azurerm_resource_group.graphdb[0].name
}

resource "azurerm_management_lock" "graphdb_rg_lock" {
for_each = var.lock_resources ? {
for key, value in local.resources_to_lock : key => value
if value != null && value != ""
} : {}

name = "${var.resource_name_prefix}-lock-${each.key}"
scope = each.value
lock_level = "CanNotDelete"
notes = "Prevents from deleting the resource group"
}

resource "azurerm_virtual_network" "graphdb" {
count = var.virtual_network_name == null ? 1 : 0

Expand Down
66 changes: 66 additions & 0 deletions modules/graphdb/templates/00_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,72 @@ log_with_timestamp() {
echo "$(date '+%Y-%m-%d %H:%M:%S'): $1"
}

wait_for_vmss_nodes() {
local VMSS_NAME="$1"
local RESOURCE_GROUP="$2"
local RETRY_DELAY=10
local MAX_RETRIES=65
local RETRY_COUNT=0

# Get the desired capacity of the VMSS
local NODE_COUNT
NODE_COUNT=$(az vmss show \
--name "$VMSS_NAME" \
--resource-group "$RESOURCE_GROUP" \
--query "sku.capacity" \
--output tsv)

# Check if NODE_COUNT is numeric and greater than 0
if [ "$NODE_COUNT" -eq "$NODE_COUNT" ] 2>/dev/null && [ "$NODE_COUNT" -ge 0 ]; then
echo "Node count is valid: $NODE_COUNT"
else
echo "Invalid node count: $NODE_COUNT"
exit 1
fi

echo "Checking VMSS node count for $VMSS_NAME with desired node count: $NODE_COUNT"

while true; do
# Get the count of running instances
RUNNING_NODE_COUNT=$(az vmss list-instances \
--resource-group "$RESOURCE_GROUP" \
--name "$VMSS_NAME" \
--expand instanceView \
--query "[?instanceView.statuses[?code=='PowerState/running']].instanceId" \
--output tsv | wc -l)

# Get the count of deleting instances
DELETING_NODE_COUNT=$(az vmss list-instances \
--resource-group "$RESOURCE_GROUP" \
--name "$VMSS_NAME" \
--query "[?provisioningState=='Deleting'].instanceId" \
--output tsv | wc -l)

echo "Running: $RUNNING_NODE_COUNT, Deleting: $DELETING_NODE_COUNT, Desired: $NODE_COUNT"

# Validate conditions: If retry count is exhausted
if [[ "$RUNNING_NODE_COUNT" -ne "$NODE_COUNT" ]] && [ "$RETRY_COUNT" -ge "$MAX_RETRIES" ]; then
echo "Error: Running nodes count ($RUNNING_NODE_COUNT) does not match the desired node count ($NODE_COUNT) after $MAX_RETRIES retries. Exiting..."
exit 1
fi

# If the conditions are met, break out of the loop
if [[ "$RUNNING_NODE_COUNT" -ge "$NODE_COUNT" ]] && [[ "$DELETING_NODE_COUNT" -eq 0 ]]; then
echo "Conditions met: Running instances >= $NODE_COUNT, no Deleting instances. Proceeding..."
break
else
if [ "$RETRY_COUNT" -ge "$MAX_RETRIES" ]; then
echo "Error: Maximum retry attempts reached. Exiting..."
exit 1
fi

echo "Conditions not met. Waiting... (Running: $RUNNING_NODE_COUNT, Deleting: $DELETING_NODE_COUNT)"
sleep "$RETRY_DELAY"
RETRY_COUNT=$((RETRY_COUNT + 1))
fi
done
}

check_gdb() {
if [ -z "$1" ]; then
log_with_timestamp "Error: IP address or hostname is not provided."
Expand Down
11 changes: 11 additions & 0 deletions modules/graphdb/templates/01_wait_resources.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ PRIVATE_DNS_ZONE_LINK_ID="${private_dns_zone_link_id}"
APP_CONFIGURATION_ENDPOINT="${app_configuration_endpoint}"
APP_CONFIGURATION_ID="${app_configuration_id}"
STORAGE_ACCOUNT_NAME=${storage_account_name}
GRAPHDB_NODE_COUNT=${node_count}
VMSS_NAME=${vmss_name}
RESOURCE_GROUP=${resource_group}

# Only run the wait_vmss_nodes function if graphdb_node_count is more than 1
if [ "$GRAPHDB_NODE_COUNT" -gt 1 ]; then
echo "GraphDB node count is greater than 1. Running wait_vmss_nodes..."
wait_for_vmss_nodes "$VMSS_NAME" "$RESOURCE_GROUP"
else
echo "GraphDB node count is not greater than 1. Skipping wait_vmss_nodes."
fi

waitForAppConfigKey() {
local config_key="$1"
Expand Down
3 changes: 3 additions & 0 deletions modules/graphdb/user_data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ data "cloudinit_config" "entrypoint" {
app_configuration_endpoint : var.app_configuration_endpoint
app_configuration_id : var.app_configuration_id
storage_account_name : var.backup_storage_account_name
node_count : var.node_count
vmss_name : "vmss-${var.resource_name_prefix}"
resource_group : var.resource_group_name
})
}

Expand Down

0 comments on commit 5a4c187

Please sign in to comment.