Skip to content

Commit

Permalink
feat: implement observability for usage of deprecated AMIs (#7362)
Browse files Browse the repository at this point in the history
Co-authored-by: skagalwala <[email protected]>
Co-authored-by: Amanuel Engeda <[email protected]>
  • Loading branch information
3 people authored Dec 10, 2024
1 parent 42ab06c commit 29dfc5f
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,9 @@ spec:
items:
description: AMI contains resolved AMI selector values utilized for node launch
properties:
deprecated:
description: Deprecation status of the AMI
type: boolean
id:
description: ID of the AMI
type: string
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,9 @@ spec:
items:
description: AMI contains resolved AMI selector values utilized for node launch
properties:
deprecated:
description: Deprecation status of the AMI
type: boolean
id:
description: ID of the AMI
type: string
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/v1/ec2nodeclass_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const (
ConditionTypeSubnetsReady = "SubnetsReady"
ConditionTypeSecurityGroupsReady = "SecurityGroupsReady"
ConditionTypeAMIsReady = "AMIsReady"
ConditionTypeAMIsDeprecated = "AMIsDeprecated"
ConditionTypeInstanceProfileReady = "InstanceProfileReady"
)

Expand Down Expand Up @@ -54,6 +55,9 @@ type AMI struct {
// ID of the AMI
// +required
ID string `json:"id"`
// Deprecation status of the AMI
// +optional
Deprecated bool `json:"deprecated,omitempty"`
// Name of the AMI
// +optional
Name string `json:"name,omitempty"`
Expand Down
13 changes: 13 additions & 0 deletions pkg/controllers/nodeclass/status/ami.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,22 @@ func (a *AMI) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconc
return v1.AMI{
Name: ami.Name,
ID: ami.AmiID,
Deprecated: ami.Deprecated,
Requirements: reqs,
}
})

// If deprecated AMIs are discovered set the AMIsDeprecated status condition
// If no deprecated AMIs are present, and previous status condition for AMIsDeprecated exists, remove the condition
hasDeprecatedAMIs := lo.Filter(nodeClass.Status.AMIs, func(ami v1.AMI, _ int) bool {
return ami.Deprecated
})
hasDeprecatedCondition := nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated) != nil
if len(hasDeprecatedAMIs) > 0 {
nodeClass.StatusConditions().SetTrue(v1.ConditionTypeAMIsDeprecated)
} else if hasDeprecatedCondition {
_ = nodeClass.StatusConditions().Clear(v1.ConditionTypeAMIsDeprecated)
}
nodeClass.StatusConditions().SetTrue(v1.ConditionTypeAMIsReady)
return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
}
210 changes: 210 additions & 0 deletions pkg/controllers/nodeclass/status/ami_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -534,4 +534,214 @@ var _ = Describe("NodeClass AMI Status Controller", func() {
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeFalse())
})
Context("NodeClass AMI Status", func() {
BeforeEach(func() {
// Set time using the injectable/fake clock to now
awsEnv.Clock.SetTime(time.Now())
nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{
{
Tags: map[string]string{"*": "*"},
},
}
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
Images: []ec2types.Image{
{
Name: aws.String("test-ami-3"),
ImageId: aws.String("ami-id-789"),
CreationDate: aws.String("2021-08-31T00:12:42.000Z"),
DeprecationTime: aws.String(awsEnv.Clock.Now().Add(30 * time.Minute).Format(time.RFC3339)),
Architecture: "x86_64",
Tags: []ec2types.Tag{
{Key: aws.String("Name"), Value: aws.String("test-ami-3")},
{Key: aws.String("foo"), Value: aws.String("bar")},
},
},
{
Name: aws.String("test-ami-2"),
ImageId: aws.String("ami-id-456"),
CreationDate: aws.String("2021-08-31T00:12:42.000Z"),
Architecture: "arm64",
Tags: []ec2types.Tag{
{Key: aws.String("Name"), Value: aws.String("test-ami-2")},
{Key: aws.String("foo"), Value: aws.String("bar")},
},
},
},
})
})
It("should update nodeclass AMI status with correct deprecation value and conditions", func() {
ExpectApplied(ctx, env.Client, nodeClass)
ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)

Expect(len(nodeClass.Status.AMIs)).To(Equal(2))
Expect(nodeClass.Status.AMIs).To(Equal(
[]v1.AMI{
{
Name: "test-ami-2",
ID: "ami-id-456",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureArm64},
},
},
},
{
Name: "test-ami-3",
ID: "ami-id-789",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureAmd64},
},
},
},
},
))
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue())

// Increment clock to simulate status updates on deprecated AMIs
awsEnv.Clock.Step(40 * time.Minute)

// Flush Cache
awsEnv.EC2Cache.Flush()

ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expect(len(nodeClass.Status.AMIs)).To(Equal(2))
Expect(nodeClass.Status.AMIs).To(Equal(
[]v1.AMI{
{
Name: "test-ami-2",
ID: "ami-id-456",
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureArm64},
},
},
},
{
Name: "test-ami-3",
ID: "ami-id-789",
// Adds deprecated field to the AMI status on the NodeClass
Deprecated: true,
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureAmd64},
},
},
},
},
))
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsDeprecated)).To(BeTrue())
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue())
})
It("should remove AMIDeprecated status condition when non deprecated AMIs are discovered", func() {
// Increment clock to simulate status updates on deprecated AMIs
awsEnv.Clock.Step(40 * time.Minute)

// Initial reconcile discovers AMIs which are deprecated
ExpectApplied(ctx, env.Client, nodeClass)
ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)

Expect(len(nodeClass.Status.AMIs)).To(Equal(2))
Expect(nodeClass.Status.AMIs).To(Equal(
[]v1.AMI{
{
Name: "test-ami-2",
ID: "ami-id-456",
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureArm64},
},
},
},
{
Name: "test-ami-3",
ID: "ami-id-789",
// Adds deprecated field to the AMI status on the NodeClass
Deprecated: true,
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureAmd64},
},
},
},
},
))
// Checks if both AMIsReady and AMIsDeprecated status conditions are set
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsDeprecated)).To(BeTrue())
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue())

// rediscover AMIs again and reconcile
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
Images: []ec2types.Image{
{
Name: aws.String("test-ami-4"),
ImageId: aws.String("ami-id-123"),
CreationDate: aws.String("2021-08-31T00:12:42.000Z"),
DeprecationTime: aws.String(awsEnv.Clock.Now().Add(30 * time.Minute).Format(time.RFC3339)),
Architecture: "x86_64",
Tags: []ec2types.Tag{
{Key: aws.String("Name"), Value: aws.String("test-ami-4")},
{Key: aws.String("foo"), Value: aws.String("bar")},
},
},
{
Name: aws.String("test-ami-2"),
ImageId: aws.String("ami-id-456"),
CreationDate: aws.String("2021-08-31T00:12:42.000Z"),
Architecture: "arm64",
Tags: []ec2types.Tag{
{Key: aws.String("Name"), Value: aws.String("test-ami-2")},
{Key: aws.String("foo"), Value: aws.String("bar")},
},
},
},
})

awsEnv.EC2Cache.Flush()

ExpectApplied(ctx, env.Client, nodeClass)
ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)

Expect(len(nodeClass.Status.AMIs)).To(Equal(2))
Expect(nodeClass.Status.AMIs).To(Equal(
[]v1.AMI{
{
Name: "test-ami-4",
ID: "ami-id-123",
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureAmd64},
},
},
},
{
Name: "test-ami-2",
ID: "ami-id-456",
Requirements: []corev1.NodeSelectorRequirement{{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{karpv1.ArchitectureArm64},
},
},
},
},
))
// Since all AMIs discovered are non deprecated, the status conditions should remove AMIsDeprecated and only set AMIsReady
Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated)).To(BeNil())
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue())
})
})
})
6 changes: 6 additions & 0 deletions test/suites/ami/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ var _ = Describe("AMI", func() {
env.ExpectCreatedNodeCount("==", 1)

env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(deprecatedAMI))))

nc := EventuallyExpectAMIsToExist(nodeClass)
Expect(len(nc.Status.AMIs)).To(BeNumerically("==", 1))
Expect(nc.Status.AMIs[0].Deprecated).To(BeTrue())
ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: v1.ConditionTypeAMIsReady, Status: metav1.ConditionTrue})
ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: v1.ConditionTypeAMIsDeprecated, Status: metav1.ConditionTrue})
})
It("should prioritize launch with non-deprecated AMIs", func() {
nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023)
Expand Down
13 changes: 13 additions & 0 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ var _ = Describe("Drift", func() {
env.ExpectCreated(dep, nodeClass, nodePool)
pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0]
env.ExpectCreatedNodeCount("==", 1)
env.ExpectUpdated(nodeClass)

By("validating the deprecated status condition has propagated")
Eventually(func(g Gomega) {
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated).IsTrue()).To(BeTrue())
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsTrue()).To(BeTrue())
}).Should(Succeed())

nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0]
node := env.EventuallyExpectNodeCount("==", 1)[0]
Expand All @@ -390,6 +397,12 @@ var _ = Describe("Drift", func() {
pod = env.EventuallyExpectHealthyPodCount(selector, numPods)[0]
env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(amdAMI))))

By("validating the deprecated status condition has been removed")
Eventually(func(g Gomega) {
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).Should(Succeed())
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated)).To(BeNil())
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsTrue()).To(BeTrue())
}).Should(Succeed())
})
It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() {
armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion()))
Expand Down
6 changes: 4 additions & 2 deletions website/content/en/preview/concepts/nodeclasses.md
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,7 @@ status:

## status.amis

[`status.amis`]({{< ref "#statusamis" >}}) contains the resolved `id`, `name`, and `requirements` of either the default AMIs for the [`spec.amiFamily`]({{< ref "#specamifamily" >}}) or the AMIs selected by the [`spec.amiSelectorTerms`]({{< ref "#specamiselectorterms" >}}) if this field is specified.
[`status.amis`]({{< ref "#statusamis" >}}) contains the resolved `id`, `name`, `requirements`, and the `deprecated` status of either the default AMIs for the [`spec.amiFamily`]({{< ref "#specamifamily" >}}) or the AMIs selected by the [`spec.amiSelectorTerms`]({{< ref "#specamiselectorterms" >}}) if this field is specified. The `deprecated` status will be shown for resolved AMIs that are deprecated.

#### Examples

Expand Down Expand Up @@ -1529,6 +1529,7 @@ status:
amis:
- id: ami-01234567890123456
name: custom-ami-amd64
deprecated: true
requirements:
- key: kubernetes.io/arch
operator: In
Expand Down Expand Up @@ -1565,7 +1566,8 @@ NodeClasses have the following status conditions:
| SubnetsReady | Subnets are discovered. |
| SecurityGroupsReady | Security Groups are discovered. |
| InstanceProfileReady | Instance Profile is discovered. |
| AMIsReady | AMIs are discovered |
| AMIsReady | AMIs are discovered. |
| AMIsDeprecated | AMIs are discovered, but they are deprecated. Individual deprecated AMIs can be identified by reviewing the `status.amis`. |
| Ready | Top level condition that indicates if the nodeClass is ready. If any of the underlying conditions is `False` then this condition is set to `False` and `Message` on the condition indicates the dependency that was not resolved. |

If a NodeClass is not ready, NodePools that reference it through their `nodeClassRef` will not be considered for scheduling.

0 comments on commit 29dfc5f

Please sign in to comment.