Skip to content

Commit

Permalink
Add successfulJobsHistoryLimit to UpgradeConfig spec
Browse files Browse the repository at this point in the history
Allows cleanup of older successful jobs.
I did not add the same field for failed jobs since they need to be cleaned up manually anyways.
It is not possible by design to delete the most recent successful job.
  • Loading branch information
bastjan committed Jan 28, 2025
1 parent 02ad41c commit 43ca59e
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 1 deletion.
14 changes: 14 additions & 0 deletions api/v1beta1/upgradeconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,25 @@ type UpgradeConfigSpec struct {
// +kubebuilder:validation:Format=duration
// +kubebuilder:default:="1h"
MaxUpgradeStartDelay metav1.Duration `json:"maxUpgradeStartDelay"`
// SuccessfulJobsHistoryLimit is the number of successful jobs to keep.
// A value smaller or equal to zero indicates no limit.
// It is not possible to remove the most recent job.
// Defaults to 4 if not set.
// +optional
SuccessfulJobsHistoryLimit *int `json:"successfulJobsHistoryLimit,omitempty"`

// JobTemplate defines the template for the upgrade job
JobTemplate UpgradeConfigJobTemplate `json:"jobTemplate"`
}

// GetSuccessfulJobsHistoryLimit returns the number of successful jobs to keep. Defaults to 3 if not set.
func (in UpgradeConfigSpec) GetSuccessfulJobsHistoryLimit() int {
if in.SuccessfulJobsHistoryLimit == nil {
return 4
}
return *in.SuccessfulJobsHistoryLimit
}

// UpgradeConfigJobTemplate defines the desired state of UpgradeJob
type UpgradeConfigJobTemplate struct {
// Standard object's metadata of the jobs created from this template.
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions config/crd/bases/managedupgrade.appuio.io_upgradeconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,13 @@ spec:
- cron
- location
type: object
successfulJobsHistoryLimit:
description: |-
SuccessfulJobsHistoryLimit is the number of successful jobs to keep.
A value smaller or equal to zero indicates no limit.
It is not possible to remove the most recent job.
Defaults to 3 if not set.
type: integer
required:
- jobTemplate
- maxSchedulingDelay
Expand Down
40 changes: 39 additions & 1 deletion controllers/upgradeconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"math"
"slices"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -55,6 +56,8 @@ type UpgradeConfigReconciler struct {
//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradesuspensionwindows,verbs=get;list;watch
//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradesuspensionwindows/status,verbs=get

//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradejobs,verbs=get;list;watch;create;update;patch;delete

// Reconcile implements the reconcile loop for UpgradeConfig.
// It schedules UpgradeJobs based on the UpgradeConfig's schedule - if an update is available.
func (r *UpgradeConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
Expand Down Expand Up @@ -86,7 +89,8 @@ func (r *UpgradeConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques

jobSchedRequeue, schedErr := r.scheduleJob(ctx, &uc, sched, location)
statusRequeue, stErr := r.updateNextPossibleSchedulesStatus(ctx, &uc, sched, location)
if err := multierr.Combine(schedErr, stErr); err != nil {
cleanupErr := r.cleanupSuccessfulJobs(ctx, &uc)
if err := multierr.Combine(schedErr, stErr, cleanupErr); err != nil {
return ctrl.Result{}, err
}

Expand Down Expand Up @@ -195,6 +199,40 @@ findNextRun:
return 0, r.setLastScheduledUpgrade(ctx, uc, nextRun)
}

func (r *UpgradeConfigReconciler) cleanupSuccessfulJobs(ctx context.Context, uc *managedupgradev1beta1.UpgradeConfig) error {
l := log.FromContext(ctx).WithName("UpgradeConfigReconciler.cleanupSuccessfulJobs")

jobs, err := r.getControlledJobs(ctx, uc)
if err != nil {
return fmt.Errorf("could not get controlled jobs: %w", err)
}

successfulJobs := make([]managedupgradev1beta1.UpgradeJob, 0, len(jobs))
for _, job := range jobs {
if apimeta.IsStatusConditionTrue(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded) {
successfulJobs = append(successfulJobs, job)
}
}

slices.SortFunc(successfulJobs, func(a, b managedupgradev1beta1.UpgradeJob) int {
return b.Spec.StartAfter.Time.Compare(a.Spec.StartAfter.Time)
})

var delErrs []error
limit := uc.Spec.GetSuccessfulJobsHistoryLimit()
if limit > 0 && len(successfulJobs) > limit {
for _, job := range successfulJobs[limit:] {
l.Info("deleting successful job", "job", job.Name)
delErrs = append(delErrs, r.Delete(ctx, &job))
}
}
if err := multierr.Combine(delErrs...); err != nil {
return fmt.Errorf("could not delete jobs: %w", err)
}

return nil
}

func (r *UpgradeConfigReconciler) setLastScheduledUpgrade(ctx context.Context, uc *managedupgradev1beta1.UpgradeConfig, t time.Time) error {
uc.Status.LastScheduledUpgrade = &metav1.Time{Time: t}
return r.Status().Update(ctx, uc)
Expand Down
107 changes: 107 additions & 0 deletions controllers/upgradeconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controllers

import (
"context"
"fmt"
"strconv"
"strings"
"testing"
Expand All @@ -10,10 +11,13 @@ import (
configv1 "github.com/openshift/api/config/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

managedupgradev1beta1 "github.com/appuio/openshift-upgrade-controller/api/v1beta1"
Expand Down Expand Up @@ -409,6 +413,109 @@ func requireEventMatches(t *testing.T, recorder *record.FakeRecorder, substrings
require.NotEmpty(t, matchingEvent, "no event matches %v, got %v", substrings, events)
}

func Test_UpgradeConfigReconciler_Reconcile_CleanupSuccessfulJobs(t *testing.T) {
ctx := context.Background()
clock := mockClock{now: time.Date(2022, time.April, 4, 8, 0, 0, 0, time.UTC)}

ucv := &configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
}

upgradeConfig := &managedupgradev1beta1.UpgradeConfig{
ObjectMeta: metav1.ObjectMeta{
Name: "daily-maintenance",
Namespace: "appuio-openshift-upgrade-controller",
CreationTimestamp: metav1.Time{Time: clock.Now().Add(-time.Hour)},
},
Spec: managedupgradev1beta1.UpgradeConfigSpec{
Schedule: managedupgradev1beta1.UpgradeConfigSchedule{
Cron: "0 22 * * *", // At 22:00 every day
},
SuccessfulJobsHistoryLimit: ptr.To(2),
},
}

client := controllerClient(t, ucv, upgradeConfig)

jobStartTs := []time.Time{
clock.Now().Add(-24 * time.Hour * 1),
clock.Now().Add(-24 * time.Hour * 2),
clock.Now().Add(-24 * time.Hour * 3),
}

for _, ts := range jobStartTs {
job := &managedupgradev1beta1.UpgradeJob{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("job-%d", ts.Unix()),
Namespace: upgradeConfig.Namespace,
},
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartAfter: metav1.NewTime(ts),
},
}
{
successfulOwned := job.DeepCopy()
successfulOwned.Name = "successful-owned-" + successfulOwned.Name
require.NoError(t, controllerutil.SetControllerReference(upgradeConfig, successfulOwned, client.Scheme()))
require.NoError(t, client.Create(ctx, successfulOwned))
apimeta.SetStatusCondition(&successfulOwned.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
Status: metav1.ConditionTrue,
})
require.NoError(t, client.Status().Update(ctx, successfulOwned))
}
{
notSuccessfulOwned := job.DeepCopy()
notSuccessfulOwned.Name = "not-successful-owned-" + notSuccessfulOwned.Name
require.NoError(t, controllerutil.SetControllerReference(upgradeConfig, notSuccessfulOwned, client.Scheme()))
require.NoError(t, client.Create(ctx, notSuccessfulOwned))
}
{
successfulNotOwned := job.DeepCopy()
successfulNotOwned.Name = "successful-not-owned-" + successfulNotOwned.Name
require.NoError(t, client.Create(ctx, successfulNotOwned))
apimeta.SetStatusCondition(&successfulNotOwned.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
Status: metav1.ConditionTrue,
})
require.NoError(t, client.Status().Update(ctx, successfulNotOwned))
}
}

recorder := record.NewFakeRecorder(5)
subject := &UpgradeConfigReconciler{
Client: client,
Scheme: client.Scheme(),
Recorder: recorder,

Clock: &clock,

ManagedUpstreamClusterVersionName: "version",
}

_, err := subject.Reconcile(ctx, requestForObject(upgradeConfig))
require.NoError(t, err)

var nj managedupgradev1beta1.UpgradeJobList
require.NoError(t, client.List(ctx, &nj))
names := make([]string, 0, len(nj.Items))
for _, j := range nj.Items {
names = append(names, j.Name)
}
require.ElementsMatch(t, names, []string{
"not-successful-owned-job-1648800000",
"not-successful-owned-job-1648886400",
"not-successful-owned-job-1648972800",
"successful-not-owned-job-1648800000",
"successful-not-owned-job-1648886400",
"successful-not-owned-job-1648972800",
"successful-owned-job-1648886400",
"successful-owned-job-1648972800",
}, "only owned and successful jobs should be cleaned up, and only the oldest ones")
}

// requireTimeEqual asserts that two times are equal, ignoring their timezones.
func requireTimeEqual(t *testing.T, expected, actual time.Time, msgAndArgs ...any) {
t.Helper()
Expand Down

0 comments on commit 43ca59e

Please sign in to comment.