From 9272ef0b4fb7b58195add0b543daa2645dee50ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20Skocze=C5=84?= <mskoczen@google.com>
Date: Wed, 16 Apr 2025 15:15:07 +0000
Subject: [PATCH] KEP-5229: Asynchronous API calls during scheduling

---
 keps/prod-readiness/sig-scheduling/5229.yaml  |   3 +
 .../README.md                                 | 961 ++++++++++++++++++
 .../kep.yaml                                  |  28 +
 3 files changed, 992 insertions(+)
 create mode 100644 keps/prod-readiness/sig-scheduling/5229.yaml
 create mode 100644 keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/README.md
 create mode 100644 keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/kep.yaml
diff --git a/keps/prod-readiness/sig-scheduling/5229.yaml b/keps/prod-readiness/sig-scheduling/5229.yaml
new file mode 100644
index 00000000000..5e828592a0b
--- /dev/null
+++ b/keps/prod-readiness/sig-scheduling/5229.yaml
@@ -0,0 +1,3 @@
+kep-number: 5229
+alpha:
+  approver: ""
diff --git a/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/README.md b/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/README.md
new file mode 100644
index 00000000000..987776fce7d
--- /dev/null
+++ b/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/README.md
@@ -0,0 +1,961 @@
+# KEP-5229: Asynchronous API calls during scheduling
+
+<!--
+A table of contents is helpful for quickly jumping to sections of a KEP and for
+highlighting any additional information provided beyond the standard KEP
+template.
+
+Ensure the TOC is wrapped with
+  <code>&lt;!-- toc --&rt;&lt;!-- /toc --&rt;</code>
+tags, and then generate with `hack/update-toc.sh`.
+-->
+
+<!-- toc -->
+- [Release Signoff Checklist](#release-signoff-checklist)
+- [Summary](#summary)
+- [Motivation](#motivation)
+  - [Goals](#goals)
+  - [Non-Goals](#non-goals)
+- [Proposal](#proposal)
+  - [1: Where and how to handle API calls in the kube-scheduler](#1-where-and-how-to-handle-api-calls-in-the-kube-scheduler)
+    - [1.1: Handle API calls in the scheduling queue](#11-handle-api-calls-in-the-scheduling-queue)
+    - [1.2: Handle API calls in the handleSchedulingFailure](#12-handle-api-calls-in-the-handleschedulingfailure)
+    - [1.3: Use advanced queue and don't block the pod from being scheduled in the meantime](#13-use-advanced-queue-and-dont-block-the-pod-from-being-scheduled-in-the-meantime)
+  - [2: How to make the API calls asynchronous](#2-how-to-make-the-api-calls-asynchronous)
+    - [2.1: Just dispatch goroutines](#21-just-dispatch-goroutines)
+    - [2.2: Make the API calls queued](#22-make-the-api-calls-queued)
+    - [2.3: Send API calls through a kube-scheduler's cache](#23-send-api-calls-through-a-kube-schedulers-cache)
+  - [Another things worth considering](#another-things-worth-considering)
+  - [Notes/Constraints/Caveats (Optional)](#notesconstraintscaveats-optional)
+  - [Risks and Mitigations](#risks-and-mitigations)
+- [Design Details](#design-details)
+  - [Test Plan](#test-plan)
+      - [Prerequisite testing updates](#prerequisite-testing-updates)
+      - [Unit tests](#unit-tests)
+      - [Integration tests](#integration-tests)
+      - [e2e tests](#e2e-tests)
+  - [Graduation Criteria](#graduation-criteria)
+  - [Upgrade / Downgrade Strategy](#upgrade--downgrade-strategy)
+  - [Version Skew Strategy](#version-skew-strategy)
+- [Production Readiness Review Questionnaire](#production-readiness-review-questionnaire)
+  - [Feature Enablement and Rollback](#feature-enablement-and-rollback)
+  - [Rollout, Upgrade and Rollback Planning](#rollout-upgrade-and-rollback-planning)
+  - [Monitoring Requirements](#monitoring-requirements)
+  - [Dependencies](#dependencies)
+  - [Scalability](#scalability)
+  - [Troubleshooting](#troubleshooting)
+- [Implementation History](#implementation-history)
+- [Drawbacks](#drawbacks)
+- [Alternatives](#alternatives)
+- [Infrastructure Needed (Optional)](#infrastructure-needed-optional)
+<!-- /toc -->
+
+## Release Signoff Checklist
+
+Items marked with (R) are required *prior to targeting to a milestone / release*.
+
+- [ ] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR)
+- [ ] (R) KEP approvers have approved the KEP status as `implementable`
+- [ ] (R) Design details are appropriately documented
+- [ ] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors)
+  - [ ] e2e Tests for all Beta API Operations (endpoints)
+  - [ ] (R) Ensure GA e2e tests meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) 
+  - [ ] (R) Minimum Two Week Window for GA e2e tests to prove flake free
+- [ ] (R) Graduation criteria is in place
+  - [ ] (R) [all GA Endpoints](https://github.com/kubernetes/community/pull/1806) must be hit by [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) 
+- [ ] (R) Production readiness review completed
+- [ ] (R) Production readiness review approved
+- [ ] "Implementation History" section is up-to-date for milestone
+- [ ] User-facing documentation has been created in [kubernetes/website], for publication to [kubernetes.io]
+- [ ] Supporting documentation—e.g., additional design documents, links to mailing list discussions/SIG meetings, relevant PRs/issues, release notes
+
+<!--
+**Note:** This checklist is iterative and should be reviewed and updated every time this enhancement is being considered for a milestone.
+-->
+
+[kubernetes.io]: https://kubernetes.io/
+[kubernetes/enhancements]: https://git.k8s.io/enhancements
+[kubernetes/kubernetes]: https://git.k8s.io/kubernetes
+[kubernetes/website]: https://git.k8s.io/website
+
+## Summary
+
+<!--
+This section is incredibly important for producing high-quality, user-focused
+documentation such as release notes or a development roadmap. It should be
+possible to collect this information before implementation begins, in order to
+avoid requiring implementors to split their attention between writing release
+notes and implementing the feature itself. KEP editors and SIG Docs
+should help to ensure that the tone and content of the `Summary` section is
+useful for a wide audience.
+
+A good summary is probably at least a paragraph in length.
+
+Both in this section and below, follow the guidelines of the [documentation
+style guide]. In particular, wrap lines to a reasonable length, to make it
+easier for reviewers to cite specific portions, and to minimize diff churn on
+updates.
+
+[documentation style guide]: https://github.com/kubernetes/community/blob/master/contributors/guide/style-guide.md
+-->
+
+This KEP proposes making all API calls during scheduling asynchronous, by introducing a new kube-scheduler-wide way of handling such calls.
+
+## Motivation
+
+<!--
+This section is for explicitly listing the motivation, goals, and non-goals of
+this KEP.  Describe why the change is important and the benefits to users. The
+motivation section can optionally provide links to [experience reports] to
+demonstrate the interest in a KEP within the wider Kubernetes community.
+
+[experience reports]: https://github.com/golang/go/wiki/ExperienceReports
+-->
+
+Scheduling performance is crucial. One of the bottlenecks is the API calls done during the scheduling cycle. 
+The binding cycle is already asynchronous, but it would still be beneficial to re-evaluate whether the current model of busy-waiting goroutines is good long-term.
+The following operations involve pod-based API calls during scheduling:
+1) Updating a Pod status in `handleSchedulingFailure` when a Pod is unschedulable.
+2) Preemption - `ClearNominatedNodeName` and pod eviction are already asynchronous with KEP-4832.
+3) Pod binding - is in the goroutine, but still could be considered.
+4) [Feature proposal: https://github.com/kubernetes/kubernetes/issues/130668] Updating a status of a Pod that is rejected by the `PreEnqueue` plugins in the scheduling queue.
+5) [Feature proposal] Set `nominatedNodeName` in delayed binding scenarios.
+
+In-tree plugins' operations that involve API calls during scheduling:
+6) Volume binding.
+7) DRA ResourceClaim deallocating in `PostFilter`.
+8) DRA removing `ReservedFor` in `Unreserve`.
+9) DRA ResourceClaims binding.
+These could be consireded to be async, but not necessarily.
+
+Making one universal approach of handling API calls in the kube-scheduler could allow these calls to be consistent, as well as better controlling
+the number of dispatched goroutines. Asynchronous preemption could also be migrated to this approach.
+
+### Goals
+
+<!--
+List the specific goals of the KEP. What is it trying to achieve? How will we
+know that this has succeeded?
+-->
+
+- New asynchronous way of making API calls is introduced to the kube-scheduler.
+- Pod update API call is replaced with an asynchronous version.
+- Make it possible to update a pod to set the `PreEnqueue` status asynchronously.
+
+### Non-Goals
+
+<!--
+What is out of scope for this KEP? Listing non-goals helps to focus discussion
+and make progress.
+-->
+
+## Proposal
+
+<!--
+This is where we get down to the specifics of what the proposal actually is.
+This should have enough detail that reviewers can understand exactly what
+you're proposing, but should not include things like API designs or
+implementation. What is the desired outcome and how do we measure success?.
+The "Design Details" section below is for the real
+nitty-gritty.
+-->
+
+There are a few ways to make API calls asynchronous.
+They are introduced below to facilitate discussion and identify the most suitable solution.
+
+These questions have to be answered:
+1) Where and how to handle pod status updates during queueing and scheduling.
+2) How to make the API calls asynchronous.
+
+Also, race (collisions) between multiple API calls for a single pod should be mitigated by the design.
+
+### 1: Where and how to handle API calls in the kube-scheduler
+
+There are multiple possible ways to handle the API calls, especially for pod status update.
+Other (potential) use cases should also be considered when choosing the solution.
+Three ways are presented below.
+
+#### 1.1: Handle API calls in the scheduling queue
+
+One possible approach is to send the API calls through a scheduling queue.
+This allows delaying putting the pod into `unschedulablePods` after updating the pod.
+This prevents race conditions from parallel updates of a single pod because, during the API call,
+the pod is in-flight and thus not eligible for rescheduling.
+
+A new method could be added to the `PriorityQueue`, which will take the function to be called asynchronously.
+It should also make sure the pod is stored in `inFlightPods` to register the cluster events that will happen during the asynchronous part.
+Calling `AddUnschedulableIfNotPresent` at the end ensures there won't be any race with the asynchronous pod update.
+Because the pod would need to be in `inFlightPods` during the API call, the size of `inFlightEvents` might increase,
+but as long as the API call executes quickly, there won't be a significant memory issue.
+
+Example solution could look like:
+
+```go
+// Author: @sanposhiho
+func (p *PriorityQueue) AddUnschedulableAsync(pInfo *framework.QueuedPodInfo, fn func() error) {
+	// Make sure the Pod is in inFlightPods before starting the goroutine
+
+	go func() { // Or another way of dispatching
+		// Run fn first 
+		if err := fn(); err != nil { ... }
+
+		// Push the pod back to the unschedQ after completing fn().
+		p.AddUnschedulableIfNotPresent(...)
+	}()
+}
+```
+
+This way, we could cover pod status updates during the failure handler (1) and pod status updates for `PreEnqueue` plugins (4).
+Asynchronous preemption (2) could be migrated to this approach by adding a possibility to return a function from `PostFilter` plugins in `PostFilterResult`
+and calling this function probably in the failure handler together with the status update.
+
+However, this method cannot be used for setting the `nominatedNodeName` scenario (5) because this operation occurs in the successful scheduling as well.
+Therefore, additional effort would have to be made to specifically ensure that the `nominatedNodeName` doesn't collide with a potential status update.
+Probably, before this status update in the failure handler, the code should try to cancel the set `nominatedNodeName` API call or wait until it finishes.
+After that, it should proceed with setting the unschedulable status via the API. The binding call might similarly need to wait.
+
+Another aspect to consider is how to dispatch the goroutines, as discussed in [how to make the API calls asynchronous](#2-how-to-make-the-api-calls-asynchronous) section.
+
+Pros:
+- Allows delaying putting unschedulable pods back to the queue until the API update completes.
+- Prevents race conditions for parallel updates of a single pod by delaying the `AddUnschedulableIfNotPresent` call.
+- Can easily cover status updates for both scheduling failures and `PreEnqueue` failures.
+- Asynchronous preemption could be migrated to this approach, increasing consistency.
+
+Cons:
+- Handling of failures might not be consistent, requiring `AddUnschedulableAsync` to be called in two places.
+- Delaying the `AddUnschedulableAsync` call increases pod queuing latency because the initial backoff timestamp is set there.
+- Cannot be used for the `nominatedNodeName` scenario, requiring additional effort and separate handling.
+- Might visibly increase the size of `inFlightEvents` if API calls are slow or if there are many calls.
+
+
+#### 1.2: Handle API calls in the handleSchedulingFailure
+
+Another approach could be to make all unschedulable status update API calls within `handleSchedulingFailure`.
+This would make this handler the only error reporting path. Synchronous API calls within this handler could be made asynchronous,
+but additional effort would be needed to prevent race conditions. This could be achieved by blocking the retries of the pod using `PreEnqueue`
+(similar to asynchronous preemption) or by implementing advanced queueing logic.
+
+This way, again, we could cover pod status updates during the failure handler (1),
+but pod status updates for `PreEnqueue` plugins (4) will require more refactoring by either:
+- Running a simplified scheduling cycle for pods that were rejected by the `PreEnqueue` to update the pod condition.
+  This might negatively impact scheduling performance because a portion of the scheduling cycles will be spent for pods that are ultimately unschedulable
+  Moreover, `PreEnqueue` plugins might also need to be called within this simplified scheduling cycle, 
+  or alternatively, `PreFilter` plugins could implement the necessary PreEnqueue logic, duplicating it.
+- Calling `handleSchedulingFailure` directly from the scheduling queue when a pod is rejected by the `PreEnqueue`. 
+  This might be feasible, although it would create a circular dependency between the scheduling queue and the handler;
+  however, it wouldn't have the same performance implications as the solution above.
+
+Asynchronous preemption could also be migrated to this approach by exposing a function,
+provided that the blocking behavior in `PreEnqueue` is consistent with the actual preemption blocking mechanism.
+
+Again, for setting the `nominatedNodeName` scenario (5), this method cannot be used because this operation occurs in the successful scheduling as well. 
+Therefore, additional effort would have to be made to specifically ensure that the `nominatedNodeName` doesn't collide with a potential status update.
+
+Pros:
+- Makes the failure handler the single path of reporting unschedulable status errors.
+- Asynchronous preemption could potentially be migrated to this approach, increasing consistency.
+- Pod would be immediately put into the scheduling queue, starting the backoff timer right away.
+
+Cons:
+- Requires additional effort to prevent race conditions for updates.
+- Handling PreEnqueue rejections requires significant refactoring (implementing a `simplified scheduling cycle or direct `handleSchedulingFailure` call).
+  - Simplified scheduling cycle for `PreEnqueue` rejections could impact performance and duplicate `PreEnqueue` logic.
+  - Direct `handleSchedulingFailure` call would introduce circular dependency.
+- Cannot be used for the `nominatedNodeName` scenario, requiring additional effort and separate handling.
+
+
+#### 1.3: Use advanced queue and don't block the pod from being scheduled in the meantime
+
+A third approach could involve allowing the pod to enter the scheduling queue and be scheduled again even before the status update API call completes, without blocking it.
+This would require implementing advanced logic for queueing API calls in the kube-scheduler and migrating **all** pod-based API calls done during scheduling to this method,
+potentially including the binding API call. The new component should be able to resolve any conflicts in the incoming API calls as well as parallelize them properly,
+e.g., don't parallelize two updates of the same pod. This requires [making the API calls queued](#22-make-the-api-calls-queued) or
+[sending API calls through a kube-scheduler's cache](#23-send-api-calls-through-a-kube-schedulers-cache) to be implemented.
+
+All pod-based scenarios (1 - 5) could and should be implemented when choosing this approach.
+Still, a single error reporting path for pod condition updates could be considered but wouldn't be required.
+
+Pros:
+- Allows the pod to be scheduled again even before the API call completes.
+- Simplifies introducing new API calls to the kube-scheduler if the collision handling logic is configured correctly.
+
+Cons:
+- Requires implementing complex, advanced queueing logic.
+- Necessitates migrating **all** pod-based API calls to this method.
+- Implementing collision resolution (e.g., for same-pod updates) is complex.
+
+
+### 2: How to make the API calls asynchronous
+
+Another thing worth considering is how to indeed make the API calls asynchronous.
+
+#### 2.1: Just dispatch goroutines
+
+With appropriate handling of races during updates, we could just dispatch goroutines with API calls.
+A potential drawback is that we won't limit the number of these goroutines and won't be able to, e.g., delay the calls.
+Limiting goroutines could still be easily achieved by having some group with a limited number of goroutines and a simple queue that will store pending calls.
+Some delay might potentially appear due to side effects, especially when there will be problems with the kube-apiserver,
+so some higher-level mechanism such as (1.1) or (1.2) would need to prevent pod update races.
+
+Pros:
+- Simple to implement if the appropriate race handling is chosen.
+- Can easily be extended with a simple queue and worker pool to limit number of goroutines.
+
+Cons:
+- Does not inherently support delaying calls.
+- Higher-level mechanisms (like 1.1 or 1.2) would be needed to prevent pod update races.
+- `nominatedNodeName` scenario support would require more effort in (1.1) or (1.2).
+
+
+#### 2.2: Make the API calls queued
+
+To make asynchronous dispatching more advanced, a queueing approach could be explored.
+A queue might understand what the API calls are intended to do and eventually delay, skip, or merge them,
+e.g., don't set `nominatedNodeName` when pod binding is enqueued.
+Initially, it could be a framework, which might be extended in the future, e.g., by introducing the possibility of setting delays.
+
+However, it is questionable what should happen if two update API calls for the same pod are enqueued.
+This might not happen in (1.1) and (1.2) if we wait for the previous status update call to complete or terminate it.
+Otherwise, as currently the update is done on a copy of a pod, these two might collide. If the update were to be done on the original pod object,
+it might be possible to simply decide what API calls should be applied for a pod:
+- Status update (patch): Apply newest API call
+- Binding: Ignore status update API calls
+- Delete (in preemption): Ignore status update as well as binding API calls
+
+```go
+type APICallType string
+
+const (
+  StatusUpdate APICallType = "status_update"
+  Binding      APICallType = "binding"
+  Delete       APICallType = "delete"
+)
+
+type PodAPICall struct {
+	podID    types.UID
+	callType APICallType
+	fn       func()
+}
+
+type APIQueue struct {
+  ...
+}
+
+func (aq *APIQueue) Add(podAPICall PodAPICall) {
+  // If API call for specific podID is already enqueued,
+  // check the callType and skip or replace the call depending on precedence.
+  ...
+}
+
+func (aq *APIQueue) Run() {
+  // Dispatch limited number of goroutines if queue is non empty.
+  ...
+}
+```
+
+Pros:
+- Allows for advanced goroutine dispatching logic.
+- Can potentially delay, skip, or merge API calls based on type (e.g., skip `nominatedNodeName` if binding is pending).
+- All collisions could be resolved at the queue level, not relying on higher-level mechanisms (like 1.1 or 1.2).
+- Allows for (1.3) where all scenarios can be supported without additional structures.
+- Provides a framework that can be extended in the future.
+
+Cons:
+- Requires complex logic to handle potential conflicts between different update types for the same pod.
+- Needs a clear strategy for how to update the in-memory pod object during scheduling.
+
+
+#### 2.3: Send API calls through a kube-scheduler's cache
+
+A third approach could be to have a consistent pod state in the kube-scheduler itself first and then change it through the API.
+This means that all API calls whould have to go through the kube-scheduler's cache, change the pod there, and after that, execute.
+However, pod updates might come from outside the kube-scheduler, e.g., a user changes the spec or something changes the status (if it is even possible).
+This extended cache would have to merge the internal state of the pod with the external state,
+including the pod update made by the kube-scheduler that will come as an event as well.
+Now, the pod object stored in the cache is based only on events that come to the kube-scheduler.
+
+Another thing to think of is that the cache stores only the bound pods. The rest of the pods is stored in the scheduling queue,
+so once again, API calls might need to go through the scheduling queue itself.
+
+Pros:
+- Aims for a consistent internal state of the pod within the kube-scheduler before calling the API, possibly simplifying conflict resolution.
+
+Cons:
+- Requires the cache to handle and merge updates coming from both the kube-scheduler's internal actions and external API events.
+- The cache currently only stores bound pods, requiring integration with the scheduling queue for pending pods.
+- Complex logic is needed to handle external updates arriving while an internal update is pending or in progress.
+
+
+### Another things worth considering
+
+- How to handle asynchronous API errors?
+
+
+### Notes/Constraints/Caveats (Optional)
+
+<!--
+What are the caveats to the proposal?
+What are some important details that didn't come across above?
+Go in to as much detail as necessary here.
+This might be a good place to talk about core concepts and how they relate.
+-->
+
+### Risks and Mitigations
+
+<!--
+What are the risks of this proposal, and how do we mitigate? Think broadly.
+For example, consider both security and how this will impact the larger
+Kubernetes ecosystem.
+
+How will security be reviewed, and by whom?
+
+How will UX be reviewed, and by whom?
+
+Consider including folks who also work outside the SIG or subproject.
+-->
+
+## Design Details
+
+<!--
+This section should contain enough information that the specifics of your
+change are understandable. This may include API specs (though not always
+required) or even code snippets. If there's any ambiguity about HOW your
+proposal will be implemented, this is the place to discuss them.
+-->
+
+### Test Plan
+
+<!--
+**Note:** *Not required until targeted at a release.*
+The goal is to ensure that we don't accept enhancements with inadequate testing.
+
+All code is expected to have adequate tests (eventually with coverage
+expectations). Please adhere to the [Kubernetes testing guidelines][testing-guidelines]
+when drafting this test plan.
+
+[testing-guidelines]: https://git.k8s.io/community/contributors/devel/sig-testing/testing.md
+-->
+
+[x] I/we understand the owners of the involved components may require updates to
+existing tests to make this code solid enough prior to committing the changes necessary
+to implement this enhancement.
+
+##### Prerequisite testing updates
+
+<!--
+Based on reviewers feedback describe what additional tests need to be added prior
+implementing this enhancement to ensure the enhancements have also solid foundations.
+-->
+
+##### Unit tests
+
+<!--
+In principle every added code should have complete unit test coverage, so providing
+the exact set of tests will not bring additional value.
+However, if complete unit test coverage is not possible, explain the reason of it
+together with explanation why this is acceptable.
+-->
+
+<!--
+Additionally, for Alpha try to enumerate the core package you will be touching
+to implement this enhancement and provide the current unit coverage for those
+in the form of:
+- <package>: <date> - <current test coverage>
+The data can be easily read from:
+https://testgrid.k8s.io/sig-testing-canaries#ci-kubernetes-coverage-unit
+
+This can inform certain test coverage improvements that we want to do before
+extending the production code to implement this enhancement.
+-->
+
+- `<package>`: `<date>` - `<test coverage>`
+
+##### Integration tests
+
+<!--
+Integration tests are contained in k8s.io/kubernetes/test/integration.
+Integration tests allow control of the configuration parameters used to start the binaries under test.
+This is different from e2e tests which do not allow configuration of parameters.
+Doing this allows testing non-default options and multiple different and potentially conflicting command line options.
+-->
+
+<!--
+This question should be filled when targeting a release.
+For Alpha, describe what tests will be added to ensure proper quality of the enhancement.
+
+For Beta and GA, add links to added tests together with links to k8s-triage for those tests:
+https://storage.googleapis.com/k8s-triage/index.html
+-->
+
+- <test>: <link to test coverage>
+
+##### e2e tests
+
+<!--
+This question should be filled when targeting a release.
+For Alpha, describe what tests will be added to ensure proper quality of the enhancement.
+
+For Beta and GA, add links to added tests together with links to k8s-triage for those tests:
+https://storage.googleapis.com/k8s-triage/index.html
+
+We expect no non-infra related flakes in the last month as a GA graduation criteria.
+-->
+
+- <test>: <link to test coverage>
+
+### Graduation Criteria
+
+<!--
+**Note:** *Not required until targeted at a release.*
+
+Define graduation milestones.
+
+These may be defined in terms of API maturity, [feature gate] graduations, or as
+something else. The KEP should keep this high-level with a focus on what
+signals will be looked at to determine graduation.
+
+Consider the following in developing the graduation criteria for this enhancement:
+- [Maturity levels (`alpha`, `beta`, `stable`)][maturity-levels]
+- [Feature gate][feature gate] lifecycle
+- [Deprecation policy][deprecation-policy]
+
+Clearly define what graduation means by either linking to the [API doc
+definition](https://kubernetes.io/docs/concepts/overview/kubernetes-api/#api-versioning)
+or by redefining what graduation means.
+
+In general we try to use the same stages (alpha, beta, GA), regardless of how the
+functionality is accessed.
+
+[feature gate]: https://git.k8s.io/community/contributors/devel/sig-architecture/feature-gates.md
+[maturity-levels]: https://git.k8s.io/community/contributors/devel/sig-architecture/api_changes.md#alpha-beta-and-stable-versions
+[deprecation-policy]: https://kubernetes.io/docs/reference/using-api/deprecation-policy/
+
+Below are some examples to consider, in addition to the aforementioned [maturity levels][maturity-levels].
+
+#### Alpha
+
+- Feature implemented behind a feature flag
+- Initial e2e tests completed and enabled
+
+#### Beta
+
+- Gather feedback from developers and surveys
+- Complete features A, B, C
+- Additional tests are in Testgrid and linked in KEP
+
+#### GA
+
+- N examples of real-world usage
+- N installs
+- More rigorous forms of testing—e.g., downgrade tests and scalability tests
+- Allowing time for feedback
+
+**Note:** Generally we also wait at least two releases between beta and
+GA/stable, because there's no opportunity for user feedback, or even bug reports,
+in back-to-back releases.
+
+**For non-optional features moving to GA, the graduation criteria must include
+[conformance tests].**
+
+[conformance tests]: https://git.k8s.io/community/contributors/devel/sig-architecture/conformance-tests.md
+
+#### Deprecation
+
+- Announce deprecation and support policy of the existing flag
+- Two versions passed since introducing the functionality that deprecates the flag (to address version skew)
+- Address feedback on usage/changed behavior, provided on GitHub issues
+- Deprecate the flag
+-->
+
+### Upgrade / Downgrade Strategy
+
+<!--
+If applicable, how will the component be upgraded and downgraded? Make sure
+this is in the test plan.
+
+Consider the following in developing an upgrade/downgrade strategy for this
+enhancement:
+- What changes (in invocations, configurations, API use, etc.) is an existing
+  cluster required to make on upgrade, in order to maintain previous behavior?
+- What changes (in invocations, configurations, API use, etc.) is an existing
+  cluster required to make on upgrade, in order to make use of the enhancement?
+-->
+
+### Version Skew Strategy
+
+<!--
+If applicable, how will the component handle version skew with other
+components? What are the guarantees? Make sure this is in the test plan.
+
+Consider the following in developing a version skew strategy for this
+enhancement:
+- Does this enhancement involve coordinating behavior in the control plane and nodes?
+- How does an n-3 kubelet or kube-proxy without this feature available behave when this feature is used?
+- How does an n-1 kube-controller-manager or kube-scheduler without this feature available behave when this feature is used?
+- Will any other components on the node change? For example, changes to CSI,
+  CRI or CNI may require updating that component before the kubelet.
+-->
+
+## Production Readiness Review Questionnaire
+
+<!--
+
+Production readiness reviews are intended to ensure that features merging into
+Kubernetes are observable, scalable and supportable; can be safely operated in
+production environments, and can be disabled or rolled back in the event they
+cause increased failures in production. See more in the PRR KEP at
+https://git.k8s.io/enhancements/keps/sig-architecture/1194-prod-readiness.
+
+The production readiness review questionnaire must be completed and approved
+for the KEP to move to `implementable` status and be included in the release.
+
+In some cases, the questions below should also have answers in `kep.yaml`. This
+is to enable automation to verify the presence of the review, and to reduce review
+burden and latency.
+
+The KEP must have a approver from the
+[`prod-readiness-approvers`](http://git.k8s.io/enhancements/OWNERS_ALIASES)
+team. Please reach out on the
+[#prod-readiness](https://kubernetes.slack.com/archives/CPNHUMN74) channel if
+you need any help or guidance.
+-->
+
+### Feature Enablement and Rollback
+
+<!--
+This section must be completed when targeting alpha to a release.
+-->
+
+###### How can this feature be enabled / disabled in a live cluster?
+
+- [x] Feature gate (also fill in values in `kep.yaml`)
+  - Feature gate name: SchedulerAsyncAPICalls
+  - Components depending on the feature gate: kube-scheduler
+
+###### Does enabling the feature change any default behavior?
+
+<!--
+Any change of default behavior may be surprising to users or break existing
+automations, so be extremely careful here.
+-->
+
+###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)?
+
+<!--
+Describe the consequences on existing workloads (e.g., if this is a runtime
+feature, can it break the existing applications?).
+
+Feature gates are typically disabled by setting the flag to `false` and
+restarting the component. No other changes should be necessary to disable the
+feature.
+
+NOTE: Also set `disable-supported` to `true` or `false` in `kep.yaml`.
+-->
+
+###### What happens if we reenable the feature if it was previously rolled back?
+
+###### Are there any tests for feature enablement/disablement?
+
+<!--
+The e2e framework does not currently support enabling or disabling feature
+gates. However, unit tests in each component dealing with managing data, created
+with and without the feature, are necessary. At the very least, think about
+conversion tests if API types are being modified.
+
+Additionally, for features that are introducing a new API field, unit tests that
+are exercising the `switch` of feature gate itself (what happens if I disable a
+feature gate after having objects written with the new field) are also critical.
+You can take a look at one potential example of such test in:
+https://github.com/kubernetes/kubernetes/pull/97058/files#diff-7826f7adbc1996a05ab52e3f5f02429e94b68ce6bce0dc534d1be636154fded3R246-R282
+-->
+
+### Rollout, Upgrade and Rollback Planning
+
+<!--
+This section must be completed when targeting beta to a release.
+-->
+
+###### How can a rollout or rollback fail? Can it impact already running workloads?
+
+<!--
+Try to be as paranoid as possible - e.g., what if some components will restart
+mid-rollout?
+
+Be sure to consider highly-available clusters, where, for example,
+feature flags will be enabled on some API servers and not others during the
+rollout. Similarly, consider large clusters and how enablement/disablement
+will rollout across nodes.
+-->
+
+###### What specific metrics should inform a rollback?
+
+<!--
+What signals should users be paying attention to when the feature is young
+that might indicate a serious problem?
+-->
+
+###### Were upgrade and rollback tested? Was the upgrade->downgrade->upgrade path tested?
+
+<!--
+Describe manual testing that was done and the outcomes.
+Longer term, we may want to require automated upgrade/rollback tests, but we
+are missing a bunch of machinery and tooling and can't do that now.
+-->
+
+###### Is the rollout accompanied by any deprecations and/or removals of features, APIs, fields of API types, flags, etc.?
+
+<!--
+Even if applying deprecation policies, they may still surprise some users.
+-->
+
+### Monitoring Requirements
+
+<!--
+This section must be completed when targeting beta to a release.
+
+For GA, this section is required: approvers should be able to confirm the
+previous answers based on experience in the field.
+-->
+
+###### How can an operator determine if the feature is in use by workloads?
+
+<!--
+Ideally, this should be a metric. Operations against the Kubernetes API (e.g.,
+checking if there are objects with field X set) may be a last resort. Avoid
+logs or events for this purpose.
+-->
+
+###### How can someone using this feature know that it is working for their instance?
+
+<!--
+For instance, if this is a pod-related feature, it should be possible to determine if the feature is functioning properly
+for each individual pod.
+Pick one more of these and delete the rest.
+Please describe all items visible to end users below with sufficient detail so that they can verify correct enablement
+and operation of this feature.
+Recall that end users cannot usually observe component logs or access metrics.
+-->
+
+- [ ] Events
+  - Event Reason: 
+- [ ] API .status
+  - Condition name: 
+  - Other field: 
+- [ ] Other (treat as last resort)
+  - Details:
+
+###### What are the reasonable SLOs (Service Level Objectives) for the enhancement?
+
+<!--
+This is your opportunity to define what "normal" quality of service looks like
+for a feature.
+
+It's impossible to provide comprehensive guidance, but at the very
+high level (needs more precise definitions) those may be things like:
+  - per-day percentage of API calls finishing with 5XX errors <= 1%
+  - 99% percentile over day of absolute value from (job creation time minus expected
+    job creation time) for cron job <= 10%
+  - 99.9% of /health requests per day finish with 200 code
+
+These goals will help you determine what you need to measure (SLIs) in the next
+question.
+-->
+
+###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service?
+
+<!--
+Pick one more of these and delete the rest.
+-->
+
+- [ ] Metrics
+  - Metric name:
+  - [Optional] Aggregation method:
+  - Components exposing the metric:
+- [ ] Other (treat as last resort)
+  - Details:
+
+###### Are there any missing metrics that would be useful to have to improve observability of this feature?
+
+<!--
+Describe the metrics themselves and the reasons why they weren't added (e.g., cost,
+implementation difficulties, etc.).
+-->
+
+### Dependencies
+
+<!--
+This section must be completed when targeting beta to a release.
+-->
+
+###### Does this feature depend on any specific services running in the cluster?
+
+<!--
+Think about both cluster-level services (e.g. metrics-server) as well
+as node-level agents (e.g. specific version of CRI). Focus on external or
+optional services that are needed. For example, if this feature depends on
+a cloud provider API, or upon an external software-defined storage or network
+control plane.
+
+For each of these, fill in the following—thinking about running existing user workloads
+and creating new ones, as well as about cluster-level services (e.g. DNS):
+  - [Dependency name]
+    - Usage description:
+      - Impact of its outage on the feature:
+      - Impact of its degraded performance or high-error rates on the feature:
+-->
+
+### Scalability
+
+<!--
+For alpha, this section is encouraged: reviewers should consider these questions
+and attempt to answer them.
+
+For beta, this section is required: reviewers must answer these questions.
+
+For GA, this section is required: approvers should be able to confirm the
+previous answers based on experience in the field.
+-->
+
+###### Will enabling / using this feature result in any new API calls?
+
+<!--
+Describe them, providing:
+  - API call type (e.g. PATCH pods)
+  - estimated throughput
+  - originating component(s) (e.g. Kubelet, Feature-X-controller)
+Focusing mostly on:
+  - components listing and/or watching resources they didn't before
+  - API calls that may be triggered by changes of some Kubernetes resources
+    (e.g. update of object X triggers new updates of object Y)
+  - periodic API calls to reconcile state (e.g. periodic fetching state,
+    heartbeats, leader election, etc.)
+-->
+
+###### Will enabling / using this feature result in introducing new API types?
+
+<!--
+Describe them, providing:
+  - API type
+  - Supported number of objects per cluster
+  - Supported number of objects per namespace (for namespace-scoped objects)
+-->
+
+###### Will enabling / using this feature result in any new calls to the cloud provider?
+
+<!--
+Describe them, providing:
+  - Which API(s):
+  - Estimated increase:
+-->
+
+###### Will enabling / using this feature result in increasing size or count of the existing API objects?
+
+<!--
+Describe them, providing:
+  - API type(s):
+  - Estimated increase in size: (e.g., new annotation of size 32B)
+  - Estimated amount of new objects: (e.g., new Object X for every existing Pod)
+-->
+
+###### Will enabling / using this feature result in increasing time taken by any operations covered by existing SLIs/SLOs?
+
+<!--
+Look at the [existing SLIs/SLOs].
+
+Think about adding additional work or introducing new steps in between
+(e.g. need to do X to start a container), etc. Please describe the details.
+
+[existing SLIs/SLOs]: https://git.k8s.io/community/sig-scalability/slos/slos.md#kubernetes-slisslos
+-->
+
+###### Will enabling / using this feature result in non-negligible increase of resource usage (CPU, RAM, disk, IO, ...) in any components?
+
+<!--
+Things to keep in mind include: additional in-memory state, additional
+non-trivial computations, excessive access to disks (including increased log
+volume), significant amount of data sent and/or received over network, etc.
+This through this both in small and large cases, again with respect to the
+[supported limits].
+
+[supported limits]: https://git.k8s.io/community//sig-scalability/configs-and-limits/thresholds.md
+-->
+
+###### Can enabling / using this feature result in resource exhaustion of some node resources (PIDs, sockets, inodes, etc.)?
+
+<!--
+Focus not just on happy cases, but primarily on more pathological cases
+(e.g. probes taking a minute instead of milliseconds, failed pods consuming resources, etc.).
+If any of the resources can be exhausted, how this is mitigated with the existing limits
+(e.g. pods per node) or new limits added by this KEP?
+
+Are there any tests that were run/should be run to understand performance characteristics better
+and validate the declared limits?
+-->
+
+### Troubleshooting
+
+<!--
+This section must be completed when targeting beta to a release.
+
+For GA, this section is required: approvers should be able to confirm the
+previous answers based on experience in the field.
+
+The Troubleshooting section currently serves the `Playbook` role. We may consider
+splitting it into a dedicated `Playbook` document (potentially with some monitoring
+details). For now, we leave it here.
+-->
+
+###### How does this feature react if the API server and/or etcd is unavailable?
+
+###### What are other known failure modes?
+
+<!--
+For each of them, fill in the following information by copying the below template:
+  - [Failure mode brief description]
+    - Detection: How can it be detected via metrics? Stated another way:
+      how can an operator troubleshoot without logging into a master or worker node?
+    - Mitigations: What can be done to stop the bleeding, especially for already
+      running user workloads?
+    - Diagnostics: What are the useful log messages and their required logging
+      levels that could help debug the issue?
+      Not required until feature graduated to beta.
+    - Testing: Are there any tests for failure mode? If not, describe why.
+-->
+
+###### What steps should be taken if SLOs are not being met to determine the problem?
+
+## Implementation History
+
+<!--
+Major milestones in the lifecycle of a KEP should be tracked in this section.
+Major milestones might include:
+- the `Summary` and `Motivation` sections being merged, signaling SIG acceptance
+- the `Proposal` section being merged, signaling agreement on a proposed design
+- the date implementation started
+- the first Kubernetes release where an initial version of the KEP was available
+- the version of Kubernetes where the KEP graduated to general availability
+- when the KEP was retired or superseded
+-->
+
+## Drawbacks
+
+<!--
+Why should this KEP _not_ be implemented?
+-->
+
+## Alternatives
+
+<!--
+What other approaches did you consider, and why did you rule them out? These do
+not need to be as detailed as the proposal, but should include enough
+information to express the idea and why it was not acceptable.
+-->
+
+## Infrastructure Needed (Optional)
+
+<!--
+Use this section if you need things from the project/SIG. Examples include a
+new subproject, repos requested, or GitHub details. Listing these here allows a
+SIG to get the process for these resources started right away.
+-->
diff --git a/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/kep.yaml b/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/kep.yaml
new file mode 100644
index 00000000000..6f3abb9c867
--- /dev/null
+++ b/keps/sig-scheduling/5229-asynchronous-api-calls-during-scheduling/kep.yaml
@@ -0,0 +1,28 @@
+title: Asynchronous API calls during scheduling
+kep-number: 5229
+authors:
+  - "@macsko"
+owning-sig: sig-scheduling
+status: implementable
+creation-date: 2025-04-08
+reviewers:
+  - dom4ha
+  - sanposhiho
+approvers:
+  - alculquicondor
+
+stage: alpha
+
+latest-milestone: "v1.34"
+
+# The milestone at which this feature was, or is targeted to be, at each stage.
+milestone:
+  alpha: "v1.34"
+
+# The following PRR answers are required at alpha release
+# List the feature gate name and the components for which it must be enabled
+feature-gates:
+  - name: SchedulerAsyncAPICalls
+    components:
+      - kube-scheduler
+disable-supported: true