Skip to content

Commit 78b2a23

Browse files
andrewd-zededaeriknordmark
authored andcommitted
Kubevirt: Run Descheduler on node boot to rebalance cluster apps
Update_RunDeschedulerOnBoot will run the descheduler to evict pods from the edge node on boot. This is to allow rebalancing apps via re-scheduling them with an aim to meet affinity as specified in the pod config. This path includes a series of gates to ensure the destination node is available as a scheduling destination. - Wait for the kubernetes api to be available. - Wait until node is online and uncordoned. - Wait until infrastructure is ready (kubevirt/longhorn). Signed-off-by: Andrew Durbin <[email protected]>
1 parent da10da5 commit 78b2a23

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

pkg/kube/cluster-init.sh

+1
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ fi
934934
check_and_remove_excessive_k3s_logs
935935
check_and_run_vnc
936936
Update_CheckClusterComponents
937+
Update_RunDeschedulerOnBoot
937938
wait_for_item "wait"
938939
sleep 15
939940
done

pkg/kube/cluster-update.sh

+50
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ trigger_k3s_selfextraction() {
4747
/usr/bin/k3s check-config >> "$INSTALL_LOG" 2>&1
4848
}
4949

50+
# shellcheck source=pkg/kube/descheduler-utils.sh
51+
. /usr/bin/descheduler-utils.sh
52+
53+
EdgeNodeInfoPath="/persist/status/zedagent/EdgeNodeInfo/global.json"
54+
COMP_UPDATE_PATH="/usr/bin/update-component"
55+
5056
link_multus_into_k3s() {
5157
ln -s /var/lib/cni/bin/multus /var/lib/rancher/k3s/data/current/bin/multus
5258
}
@@ -141,6 +147,49 @@ Update_CheckClusterComponents() {
141147
wait_for_item "update_cluster_post"
142148
}
143149

150+
# Update_RunDeschedulerOnBoot will run the descheduler to evict pods from the edge node
151+
# on boot. This is to allow rebalancing apps via re-scheduling them with an aim to meet
152+
# affinity as specified in the pod config.
153+
Update_RunDeschedulerOnBoot() {
154+
# Currently only run once per boot
155+
if [ -f /tmp/descheduler-ran-onboot ]; then
156+
return
157+
fi
158+
159+
if [ ! -f $EdgeNodeInfoPath ]; then
160+
return
161+
fi
162+
# is api ready
163+
if ! update_isClusterReady; then
164+
return
165+
fi
166+
# Don't run unless it has been installed
167+
if ! descheduler_install; then
168+
return
169+
fi
170+
# node ready and allowing scheduling
171+
node=$(jq -r '.DeviceName' < $EdgeNodeInfoPath | tr -d '\n' | tr '[:upper:]' '[:lower:]')
172+
node_count_ready=$(kubectl get "node/${node}" | grep -v SchedulingDisabled | grep -cw Ready )
173+
if [ "$node_count_ready" -ne 1 ]; then
174+
return
175+
fi
176+
# Ensure all infrastructure pods are online on node
177+
lhStatus=$(kubectl -n longhorn-system get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
178+
if [ "$lhStatus" != "truetruetrue" ]; then
179+
return
180+
fi
181+
kvStatus=$(kubectl -n kubevirt get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
182+
if [ "$kvStatus" != "true" ]; then
183+
return
184+
fi
185+
# Job lives persistently in cluster, cleanup after old runs
186+
if kubectl -n kube-system get job/descheduler-job; then
187+
kubectl -n kube-system delete job/descheduler-job
188+
fi
189+
kubectl apply -f /etc/descheduler-job.yaml
190+
touch /tmp/descheduler-ran-onboot
191+
}
192+
144193
update_isClusterReady() {
145194
if ! kubectl cluster-info; then
146195
return 1
@@ -152,6 +201,7 @@ update_isClusterReady() {
152201
return 0
153202
}
154203

204+
155205
#
156206
# Handle kube component updates
157207
#

0 commit comments

Comments
 (0)