From 6777797046c1fb1f8307a7b42288d09255e903c8 Mon Sep 17 00:00:00 2001 From: AnnaZivkovic Date: Tue, 4 Feb 2025 08:57:10 -0800 Subject: [PATCH] Added preferredDuringSchedulingIgnoredDuringExecution during scheduling --- controllers/podplacement/pod_model.go | 65 +++++++++++++++++++++- controllers/podplacement/pod_model_test.go | 2 +- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/controllers/podplacement/pod_model.go b/controllers/podplacement/pod_model.go index 7f28b2b7..e22209da 100644 --- a/controllers/podplacement/pod_model.go +++ b/controllers/podplacement/pod_model.go @@ -19,6 +19,7 @@ package podplacement import ( "context" "fmt" + "github.com/openshift/multiarch-tuning-operator/pkg/informers" "strconv" "strings" "time" @@ -49,6 +50,7 @@ type Pod struct { corev1.Pod ctx context.Context recorder record.EventRecorder + ic informers.ICache } func (pod *Pod) GetPodImagePullSecrets() []string { @@ -122,13 +124,18 @@ func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) (boo pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} } - pod.setArchNodeAffinity(requirement) + if pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution == nil { + pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []corev1.PreferredSchedulingTerm{} + } + + pod.setRequiredArchNodeAffinity(requirement) + pod.setPreferredArchNodeAffinity() return true, nil } // setArchNodeAffinity sets the node affinity for the pod to the given requirement based on the rules in // the sig-scheduling's KEP-3838: https://github.com/kubernetes/enhancements/tree/master/keps/sig-scheduling/3838-pod-mutable-scheduling-directives. -func (pod *Pod) setArchNodeAffinity(requirement corev1.NodeSelectorRequirement) { +func (pod *Pod) setRequiredArchNodeAffinity(requirement corev1.NodeSelectorRequirement) { // the .requiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms are ORed if len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { // We create a new array of NodeSelectorTerm of length 1 so that we can always iterate it in the next. @@ -166,6 +173,60 @@ func (pod *Pod) setArchNodeAffinity(requirement corev1.NodeSelectorRequirement) ArchitecturePredicateSetupMsg+fmt.Sprintf("{%s}", strings.Join(requirement.Values, ", "))) } +// setArchNodeAffinity sets the node affinity for the pod to the given requirement based on the rules in +// the sig-scheduling's KEP-3838: https://github.com/kubernetes/enhancements/tree/master/keps/sig-scheduling/3838-pod-mutable-scheduling-directives. +func (pod *Pod) setPreferredArchNodeAffinity() { + // Ensure ic is initialized + //log := ctrllog.FromContext(pod.ctx) + + if pod.ic == nil { + pod.ic = informers.CacheSingleton() + } + + // Retrieve the ClusterPodPlacementConfig + cppc := pod.ic.GetClusterPodPlacementConfig() + if cppc == nil { + return + } + + // Case: there are no specified weights + if !cppc.Spec.Plugins.NodeAffinityScoring.IsEnabled() { + return + } + + if len(pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) == 0 { + // We create a new array of NodeSelectorTerm of length 1 so that we can always iterate it in the next. + pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = make([]corev1.PreferredSchedulingTerm, 1) + } + preferredSchedulingTerms := pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution + archWeights := map[string]int32{} + + for _, nodeAffinityScoringPlatformTerm := range cppc.Spec.Plugins.NodeAffinityScoring.Platforms { + + // initialize PreferredSchedulingTerm + preferredSchedulingTerm := corev1.PreferredSchedulingTerm{ + Weight: nodeAffinityScoringPlatformTerm.Weight, // Ensure this is an int32 + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: corev1.NodeSelectorOpIn, + Values: []string{nodeAffinityScoringPlatformTerm.Architecture}, + }, + }, + }, + } + preferredSchedulingTerms = append(preferredSchedulingTerms, preferredSchedulingTerm) + archWeights[nodeAffinityScoringPlatformTerm.Architecture] = nodeAffinityScoringPlatformTerm.Weight + } + + // if the nodeSelectorTerms were patched at least once, we set the nodeAffinity label to the set value, to keep + // track of the fact that the nodeAffinity was patched by the operator. + pod.ensureLabel(utils.NodeAffinityLabel, utils.NodeAffinityLabelValueSet) + pod.publishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, + ArchitecturePredicateSetupMsg+fmt.Sprintf("%v", archWeights)) +} + func (pod *Pod) getArchitecturePredicate(pullSecretDataList [][]byte) (corev1.NodeSelectorRequirement, error) { architectures, err := pod.intersectImagesArchitecture(pullSecretDataList) // if an error occurs, we return an empty NodeSelectorRequirement and the error. diff --git a/controllers/podplacement/pod_model_test.go b/controllers/podplacement/pod_model_test.go index c7d58a05..7effa52c 100644 --- a/controllers/podplacement/pod_model_test.go +++ b/controllers/podplacement/pod_model_test.go @@ -517,7 +517,7 @@ func TestPod_setArchNodeAffinity(t *testing.T) { g := NewGomegaWithT(t) pred, err := pod.getArchitecturePredicate(nil) g.Expect(err).ShouldNot(HaveOccurred()) - pod.setArchNodeAffinity(pred) + pod.setRequiredArchNodeAffinity(pred) g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity)) imageInspectionCache = mmoimage.FacadeSingleton() })