Skip to content

[WIP] MCO-1669: add BootImageSkewEnforcement API #2357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 81 additions & 1 deletion openapi/generated_openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions openapi/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -27587,6 +27587,24 @@
}
}
},
"com.github.openshift.api.operator.v1.ClusterBootImage": {
"description": "ClusterBootImage describes the boot image of a cluster. It stores the RHCOS version of the boot image and the OCP release version which shipped with that RHCOS boot image.",
"type": "object",
"required": [
"ocpVersion"
],
"properties": {
"ocpVersion": {
"description": "ocpVersion provides a string which represents the OCP version of the boot image",
"type": "string",
"default": ""
},
"rhcosVersion": {
"description": "rhcosVersion provides a string which represents the RHCOS version of the boot image",
"type": "string"
}
}
},
"com.github.openshift.api.operator.v1.ClusterCSIDriver": {
"description": "ClusterCSIDriver object allows management and configuration of a CSI driver operator installed by default in OpenShift. Name of the object must be name of the CSI driver it operates. See CSIDriverName type for list of allowed values.\n\nCompatibility level 1: Stable within a major release for a minimum of 12 months or 3 minor releases (whichever is longer).",
"type": "object",
Expand Down Expand Up @@ -30928,6 +30946,11 @@
"forceRedeploymentReason"
],
"properties": {
"bootImageSkewEnforcement": {
"description": "bootImageSkewEnforcement allows an admin to set the behavior of the boot image skew enforcement mechanism.",
"default": {},
"$ref": "#/definitions/com.github.openshift.api.operator.v1.SkewEnforcementSelector"
},
"failedRevisionLimit": {
"description": "failedRevisionLimit is the number of failed static pod installer revisions to keep on disk and in the api -1 = unlimited, 0 or unset = 5 (default)",
"type": "integer",
Expand Down Expand Up @@ -33280,6 +33303,32 @@
}
}
},
"com.github.openshift.api.operator.v1.SkewEnforcementSelector": {
"type": "object",
"required": [
"mode"
],
"properties": {
"clusterBootImage": {
"description": "clusterBootImage describes the current boot image of the cluster. This will be used to enforce the skew limit. Only permitted when mode is set to \"Automatic\" or \"Manual\".",
"default": {},
"$ref": "#/definitions/com.github.openshift.api.operator.v1.ClusterBootImage"
},
"mode": {
"description": "mode determines the underlying behavior of skew enforcement mechanism. Valid values are Automatic, Manual and Disabled. Automatic means that the MCO will store the OCP version associated with the last boot image update in the clusterBootImage field. Manual means that the cluster admin is expected to perform manual boot image updates and store OCP version associated with the last boot image update in the clusterBootImage field. In Automatic and Manual mode, the MCO will prevent upgrades when the boot image skew exceeds the skew limit described by the release image. Disabled means that the MCO will permit upgrades when the boot image exceeds the skew limit described by the release image. This may affect the cluster's ability to scale.",
"type": "string",
"default": ""
}
},
"x-kubernetes-unions": [
{
"discriminator": "mode",
"fields-to-discriminateBy": {
"clusterBootImage": "ClusterBootImage"
}
}
]
},
"com.github.openshift.api.operator.v1.StaticIPAMAddresses": {
"description": "StaticIPAMAddresses provides IP address and Gateway for static IPAM addresses",
"type": "object",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
apiVersion: apiextensions.k8s.io/v1 # Hack because controller-gen complains if we don't have this
name: "MachineConfiguration"
crdName: machineconfigurations.operator.openshift.io
featureGates:
- BootImageSkewEnforcement
tests:
onCreate:
- name: Should be able to create a minimal MachineConfiguration
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec: {} # No spec is required for a MachineConfiguration
expected: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
logLevel: Normal
operatorLogLevel: Normal
- name: Should be able to create an manual BootImageSkewEnforcement configuration knob
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Manual
clusterBootImage:
ocpVersion: "4.18.2"
rhcosVersion: "9.6.20250523-1"
expected: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
logLevel: Normal
operatorLogLevel: Normal
bootImageSkewEnforcement:
mode: Manual
clusterBootImage:
ocpVersion: "4.18.2"
rhcosVersion: "9.6.20250523-1"
- name: Should be able to create an automatic BootImageSkewEnforcement configuration knob
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Automatic
clusterBootImage:
ocpVersion: "4.18.2"
rhcosVersion: "9.6.20250523-1"
expected: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
logLevel: Normal
operatorLogLevel: Normal
bootImageSkewEnforcement:
mode: Automatic
clusterBootImage:
ocpVersion: "4.18.2"
rhcosVersion: "9.6.20250523-1"
- name: Should be able to create a disabled BootImageSkewEnforcement configuration knob
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Disabled
expected: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
logLevel: Normal
operatorLogLevel: Normal
bootImageSkewEnforcement:
mode: Disabled
- name: Should not be able to add ClusterBootImage field if bootImageSkewEnforcement.mode is set to Disabled
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Disabled
clusterBootImage:
ocpVersion: "4.18.2"
rhcosVersion: "9.6.20250523-1"
expectedError: "clusterBootImage is required when type is Automatic or Manual, and forbidden otherwise"
- name: ClusterBootImage field should be set if bootImageSkewEnforcement.mode is set to Automatic
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Automatic
expectedError: "clusterBootImage is required when type is Automatic or Manual, and forbidden otherwise"
- name: ClusterBootImage field should be set if bootImageSkewEnforcement.mode is set to Manual
initial: |
apiVersion: operator.openshift.io/v1
kind: MachineConfiguration
spec:
bootImageSkewEnforcement:
mode: Manual
expectedError: "clusterBootImage is required when type is Automatic or Manual, and forbidden otherwise"
58 changes: 58 additions & 0 deletions operator/v1/types_machineconfiguration.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,66 @@ type MachineConfigurationSpec struct {
// +openshift:enable:FeatureGate=NodeDisruptionPolicy
// +optional
NodeDisruptionPolicy NodeDisruptionPolicyConfig `json:"nodeDisruptionPolicy"`
// bootImageSkewEnforcement allows an admin to set the behavior of the boot image skew enforcement mechanism.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does an admin care about configuring this? What does configuring this allow them to achieve?

// +openshift:enable:FeatureGate=BootImageSkewEnforcement
// +optional
BootImageSkewEnforcement SkewEnforcementSelector `json:"bootImageSkewEnforcement"`
}

// +kubebuilder:validation:XValidation:rule="has(self.mode) && (self.mode == 'Automatic' || self.mode =='Manual') ? has(self.clusterBootImage) : !has(self.clusterBootImage)",message="clusterBootImage is required when type is Automatic or Manual, and forbidden otherwise"
// +union
type SkewEnforcementSelector struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Being more explicit in the type name helps eliminate potential for future conflicts in naming of similar features within the same package. Also makes it a bit more clear from the dev perspective that this type is explicitly used for boot image skew enforcement.

Suggested change
type SkewEnforcementSelector struct {
type BootImageSkewEnforcementSelector struct {
Suggested change
type SkewEnforcementSelector struct {
type SkewEnforcementSelector struct {

// mode determines the underlying behavior of skew enforcement mechanism.
// Valid values are Automatic, Manual and Disabled.
// Automatic means that the MCO will store the OCP version associated with the last boot image update in the
// clusterBootImage field.
// Manual means that the cluster admin is expected to perform manual boot image updates and store OCP version
// associated with the last boot image update in the clusterBootImage field.
// In Automatic and Manual mode, the MCO will prevent upgrades when the boot image skew exceeds the
// skew limit described by the release image.
// Disabled means that the MCO will permit upgrades when the boot image exceeds the skew limit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: In general we try to avoid using the term Disabled because it is often overloaded to mean different things. I don't think this usage is particularly concerning, but maybe something like None is more intuitive for a field like this?

// described by the release image. This may affect the cluster's ability to scale.
// +unionDiscriminator
// +required
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explicitly mention in the GoDoc that this field is required

Mode SkewEnforcementSelectorMode `json:"mode"`

// clusterBootImage describes the current boot image of the cluster. This will be used to enforce the skew limit.
// Only permitted when mode is set to "Automatic" or "Manual".
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Match the CEL validation error message:

Suggested change
// Only permitted when mode is set to "Automatic" or "Manual".
// Required when mode is set to "Automatic" or "Manual" and forbidden otherwise.

// +optional
ClusterBootImage ClusterBootImage `json:"clusterBootImage,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this falls into the criteria where this should be a pointer:

Suggested change
ClusterBootImage ClusterBootImage `json:"clusterBootImage,omitempty"`
ClusterBootImage *ClusterBootImage `json:"clusterBootImage,omitempty"`

I suspect you don't want something like:

clusterBootImage:
  ocpVersion: ""

to be valid

}

// ClusterBootImage describes the boot image of a cluster. It stores the RHCOS version of the boot image and
// the OCP release version which shipped with that RHCOS boot image.
type ClusterBootImage struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the relationship between ocpVersion and rhcosVersion here? What happens if I do/don't include rhcosVersion?

// ocpVersion provides a string which represents the OCP version of the boot image
// +kubebuilder:validation:XValidation:rule="self.matches('^[0-9]+\\\\.[0-9]+\\\\.[0-9]+$')",message="bootImageOCPVersion must match the OCP semver compatible format of x.y.z"
// +kubebuilder:validation:MaxLength:=8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious, why was 8 chosen?

// +required
Comment on lines +92 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explicitly mention these constraints, in plain english sentences, in the GoDoc. Users don't get to see the markers in the generated documentation so spelling them out helps make it so they are aware of any constraints.

OCPVersion string `json:"ocpVersion"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you wanting to allow the empty string "" as a valid value here? If not, you'll probably want a minimum length here to prevent the empty string being valid.

If you do, what does this being an empty string mean?


// rhcosVersion provides a string which represents the RHCOS version of the boot image
// +kubebuilder:validation:XValidation:rule="self.matches('^[0-9]+\\\\.[0-9]+\\\\.[0-9]{8}-[0-9]+$')",message="rhcosVersion must match format [major].[minor].[datestamp(YYYYMMDD)]-[buildnumber]"
// +kubebuilder:validation:MaxLength:=15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 15? is this long enough?

Looking at https://access.redhat.com/solutions/3787021 it looks like something like 416.94.202411201433-0 would be valid and is 21 characters long.

// +optional
Comment on lines +98 to +100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explicitly mention these constraints in the godoc.

RHCOSVersion string `json:"rhcosVersion,omitempty"`
}

// SkewEnforcementSelectorMode is a string enum used to indicate the cluster's boot image skew enforcement mode.
// +kubebuilder:validation:Enum:="Automatic";"Manual";"Disabled"
type SkewEnforcementSelectorMode string

const (
// Automatic represents a configuration mode that allows automatic skew enforcement.
Automatic SkewEnforcementSelectorMode = "Automatic"

// Manual represents a configuration mode that allows manual skew enforcement.
Manual SkewEnforcementSelectorMode = "Manual"

// Disabled represents a configuration mode that disables boot image skew enforcement.
Disabled SkewEnforcementSelectorMode = "Disabled"
Comment on lines +110 to +116
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prevent naming conflicts and maintain consistency with how we define constants within OpenShift by following the format {typeAlias}{name}:

Suggested change
Automatic SkewEnforcementSelectorMode = "Automatic"
// Manual represents a configuration mode that allows manual skew enforcement.
Manual SkewEnforcementSelectorMode = "Manual"
// Disabled represents a configuration mode that disables boot image skew enforcement.
Disabled SkewEnforcementSelectorMode = "Disabled"
SkewEnforcementSelectorModeAutomatic SkewEnforcementSelectorMode = "Automatic"
// Manual represents a configuration mode that allows manual skew enforcement.
SkewEnforcementSelectorModeManual SkewEnforcementSelectorMode = "Manual"
// Disabled represents a configuration mode that disables boot image skew enforcement.
SkewEnforcementSelectorModeDisabled SkewEnforcementSelectorMode = "Disabled"

)

type MachineConfigurationStatus struct {
// observedGeneration is the last generation change you've dealt with
// +optional
Expand Down
Loading