Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubevirt: Run Descheduler on node boot to rebalance cluster apps #4506

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/kube/cluster-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ fi
check_and_remove_excessive_k3s_logs
check_and_run_vnc
Update_CheckClusterComponents
Update_RunDeschedulerOnBoot
wait_for_item "wait"
sleep 15
done
50 changes: 50 additions & 0 deletions pkg/kube/cluster-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ trigger_k3s_selfextraction() {
/usr/bin/k3s check-config >> "$INSTALL_LOG" 2>&1
}

# shellcheck source=pkg/kube/descheduler-utils.sh
. /usr/bin/descheduler-utils.sh

EdgeNodeInfoPath="/persist/status/zedagent/EdgeNodeInfo/global.json"
COMP_UPDATE_PATH="/usr/bin/update-component"

link_multus_into_k3s() {
ln -s /var/lib/cni/bin/multus /var/lib/rancher/k3s/data/current/bin/multus
}
Expand Down Expand Up @@ -141,6 +147,49 @@ Update_CheckClusterComponents() {
wait_for_item "update_cluster_post"
}

# Update_RunDeschedulerOnBoot will run the descheduler to evict pods from the edge node
# on boot. This is to allow rebalancing apps via re-scheduling them with an aim to meet
# affinity as specified in the pod config.
Update_RunDeschedulerOnBoot() {
# Currently only run once per boot
if [ -f /tmp/descheduler-ran-onboot ]; then
return
fi

if [ ! -f $EdgeNodeInfoPath ]; then
return
fi
# is api ready
if ! update_isClusterReady; then
return
fi
# Don't run unless it has been installed
if ! descheduler_install; then
return
fi
# node ready and allowing scheduling
node=$(jq -r '.DeviceName' < $EdgeNodeInfoPath | tr -d '\n' | tr '[:upper:]' '[:lower:]')
node_count_ready=$(kubectl get "node/${node}" | grep -v SchedulingDisabled | grep -cw Ready )
if [ "$node_count_ready" -ne 1 ]; then
return
fi
# Ensure all infrastructure pods are online on node
lhStatus=$(kubectl -n longhorn-system get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
if [ "$lhStatus" != "truetruetrue" ]; then
return
fi
kvStatus=$(kubectl -n kubevirt get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
if [ "$kvStatus" != "true" ]; then
return
fi
# Job lives persistently in cluster, cleanup after old runs
if kubectl -n kube-system get job/descheduler-job; then
kubectl -n kube-system delete job/descheduler-job
fi
kubectl apply -f /etc/descheduler-job.yaml
touch /tmp/descheduler-ran-onboot
}

update_isClusterReady() {
if ! kubectl cluster-info; then
return 1
Expand All @@ -152,6 +201,7 @@ update_isClusterReady() {
return 0
}


#
# Handle kube component updates
#
Expand Down
Loading