Skip to content

Commit 784aa25

Browse files
Kubevirt: Run Descheduler on node boot to rebalance cluster apps
Update_RunDeschedulerOnBoot will run the descheduler to evict pods from the edge node on boot. This is to allow rebalancing apps via re-scheduling them with an aim to meet affinity as specified in the pod config. This path includes a series of gates to ensure the destination node is available as a scheduling destination. - Wait for the kubernetes api to be available. - Wait until node is online and uncordoned. - Wait until infrastructure is ready (kubevirt/longhorn). Signed-off-by: Andrew Durbin <[email protected]>
1 parent 0769a20 commit 784aa25

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

pkg/kube/cluster-init.sh

+1
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,7 @@ fi
862862
check_kubeconfig_yaml_files
863863
check_and_remove_excessive_k3s_logs
864864
check_and_run_vnc
865+
Update_RunDeschedulerOnBoot
865866
wait_for_item "wait"
866867
sleep 15
867868
done

pkg/kube/cluster-update.sh

+67
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,73 @@
33
# Copyright (c) 2024 Zededa, Inc.
44
# SPDX-License-Identifier: Apache-2.0
55

6+
# shellcheck source=pkg/kube/descheduler-utils.sh
7+
. /usr/bin/descheduler-utils.sh
8+
9+
EdgeNodeInfoPath="/persist/status/zedagent/EdgeNodeInfo/global.json"
10+
COMP_UPDATE_PATH="/usr/bin/update-component"
11+
612
link_multus_into_k3s() {
713
ln -s /var/lib/cni/bin/multus /var/lib/rancher/k3s/data/current/bin/multus
814
}
15+
16+
# Update_RunDeschedulerOnBoot will run the descheduler to evict pods from the edge node
17+
# on boot. This is to allow rebalancing apps via re-scheduling them with an aim to meet
18+
# affinity as specified in the pod config.
19+
Update_RunDeschedulerOnBoot() {
20+
# Currently only run once per boot
21+
if [ -f /tmp/descheduler-ran-onboot ]; then
22+
return
23+
fi
24+
25+
if [ ! -f $EdgeNodeInfoPath ]; then
26+
return
27+
fi
28+
# is api ready
29+
if ! update_isClusterReady; then
30+
return
31+
fi
32+
# Don't run unless it has been installed
33+
if ! descheduler_install; then
34+
return
35+
fi
36+
# node ready and allowing scheduling
37+
node=$(jq -r '.DeviceName' < $EdgeNodeInfoPath | tr -d '\n' | tr '[:upper:]' '[:lower:]')
38+
node_count_ready=$(kubectl get "node/${node}" | grep -v SchedulingDisabled | grep -cw Ready )
39+
if [ "$node_count_ready" -ne 1 ]; then
40+
return
41+
fi
42+
# Ensure all infrastructure pods are online on node
43+
lhStatus=$(kubectl -n longhorn-system get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
44+
if [ "$lhStatus" != "truetruetrue" ]; then
45+
return
46+
fi
47+
kvStatus=$(kubectl -n kubevirt get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
48+
if [ "$kvStatus" != "true" ]; then
49+
return
50+
fi
51+
# Job lives persistently in cluster, cleanup after old runs
52+
if kubectl -n kube-system get job/descheduler-job; then
53+
kubectl -n kube-system delete job/descheduler-job
54+
fi
55+
kubectl apply -f /etc/descheduler-job.yaml
56+
touch /tmp/descheduler-ran-onboot
57+
}
58+
59+
update_isClusterReady() {
60+
if ! kubectl cluster-info; then
61+
return 1
62+
fi
63+
64+
if ! update_Helper_APIResponding; then
65+
return 1
66+
fi
67+
return 0
68+
}
69+
70+
update_Helper_APIResponding() {
71+
if $COMP_UPDATE_PATH --check-api-ready; then
72+
return 0
73+
fi
74+
return 1
75+
}

0 commit comments

Comments
 (0)