diff --git a/internal/controller/scheduler/pod_watcher.go b/internal/controller/scheduler/pod_watcher.go index 48859aa7..dbfbe358 100644 --- a/internal/controller/scheduler/pod_watcher.go +++ b/internal/controller/scheduler/pod_watcher.go @@ -126,8 +126,10 @@ func (w *podWatcher) OnDelete(maybePod any) { return } - jobUUID, _, err := w.jobUUIDAndLogger(pod) + log := loggerForObject(w.logger, pod) + jobUUID, err := jobUUIDForObject(pod) if err != nil { + log.Error("Job UUID label missing or invalid for pod") return } @@ -166,8 +168,10 @@ func (w *podWatcher) OnUpdate(oldMaybePod, newMaybePod any) { } func (w *podWatcher) runChecks(ctx context.Context, pod *corev1.Pod) { - jobUUID, log, err := w.jobUUIDAndLogger(pod) + log := loggerForObject(w.logger, pod) + jobUUID, err := jobUUIDForObject(pod) if err != nil { + log.Error("Job UUID label missing or invalid for pod") return } @@ -189,37 +193,6 @@ func (w *podWatcher) runChecks(ctx context.Context, pod *corev1.Pod) { w.startOrStopJobCancelChecker(ctx, log, pod, jobUUID) } -func (w *podWatcher) jobUUIDAndLogger(pod *corev1.Pod) (uuid.UUID, *zap.Logger, error) { - log := w.logger.With(zap.String("namespace", pod.Namespace), zap.String("podName", pod.Name)) - - rawJobUUID, exists := pod.Labels[config.UUIDLabel] - if !exists { - log.Debug("Job UUID label not present. Skipping.") - return uuid.UUID{}, log, errors.New("no job UUID label") - } - - jobUUID, err := uuid.Parse(rawJobUUID) - if err != nil { - log.Warn("Job UUID label was not a UUID!", zap.String("jobUUID", rawJobUUID)) - return uuid.UUID{}, log, err - } - - log = log.With(zap.String("jobUUID", jobUUID.String())) - - // Check that tags match - there may be pods around that were created by - // another controller using different tags. - if !agenttags.JobTagsMatchAgentTags(agenttags.ScanLabels(pod.Labels), w.agentTags) { - log.Debug("Pod labels do not match agent tags for this controller. Skipping.") - return uuid.UUID{}, log, errors.New("pod labels do not match agent tags for this controller") - } - - if w.isIgnored(jobUUID) { - log.Debug("Job already failed, canceled, or wasn't in a failable/cancellable state") - return jobUUID, log, errors.New("job ignored") - } - return jobUUID, log, nil -} - func (w *podWatcher) failOnImagePullFailure(ctx context.Context, log *zap.Logger, pod *corev1.Pod, jobUUID uuid.UUID) { log.Debug("Checking pod containers for ImagePullBackOff or InvalidImageName") diff --git a/internal/controller/scheduler/uuid_and_logger.go b/internal/controller/scheduler/uuid_and_logger.go new file mode 100644 index 00000000..9711b4e7 --- /dev/null +++ b/internal/controller/scheduler/uuid_and_logger.go @@ -0,0 +1,30 @@ +package scheduler + +import ( + "errors" + + "github.com/buildkite/agent-stack-k8s/v2/internal/controller/config" + "github.com/google/uuid" + "go.uber.org/zap" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// loggerForObject curries a logger with namespace, name, and job UUID taken +// from the object labels. +func loggerForObject(baseLog *zap.Logger, obj metav1.Object) *zap.Logger { + return baseLog.With( + zap.String("namespace", obj.GetNamespace()), + zap.String("name", obj.GetName()), + zap.String("jobUUID", obj.GetLabels()[config.UUIDLabel]), + ) +} + +// jobUUIDForObject parses the Buildkite job UUID from the object labels. +func jobUUIDForObject(obj metav1.Object) (uuid.UUID, error) { + rawJobUUID := obj.GetLabels()[config.UUIDLabel] + if rawJobUUID == "" { + return uuid.UUID{}, errors.New("no job UUID label") + } + + return uuid.Parse(rawJobUUID) +}