Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow pod spec to be defined and patched in agent and plugin #262

19 changes: 17 additions & 2 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,20 @@ org: my-buildkite-org
# the UUID may be found in the cluster settings
cluster-uuid: beefcafe-abbe-baba-abba-deedcedecade
tags:
- queue=my-queue
- priority=high
- queue=my-queue
- priority=high
# ssh-credentials-secret are the secret with the ssh credentials
# configured on the agent with docker-ssh-config
# @see https://github.com/buildkite/docker-ssh-env-config
ssh-credentials-secret: buildkite-ssh-github-creds
pod-spec-patch:
serviceAccountName: 'buildkite-agent-sa'
automountServiceAccountToken: true
containers:
- name: container-0
env:
- name: GITHUB_TOKEN
valueFrom:
secretKeyRef:
name: github-secrets
key: github-token
28 changes: 17 additions & 11 deletions internal/controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ const (
)

type Config struct {
Debug bool `mapstructure:"debug"`
AgentTokenSecret string `mapstructure:"agent-token-secret" validate:"required"`
BuildkiteToken string `mapstructure:"buildkite-token" validate:"required"`
Image string `mapstructure:"image" validate:"required"`
JobTTL time.Duration `mapstructure:"job-ttl"`
MaxInFlight int `mapstructure:"max-in-flight" validate:"min=0"`
Namespace string `mapstructure:"namespace" validate:"required"`
Org string `mapstructure:"org" validate:"required"`
Tags stringSlice `mapstructure:"tags" validate:"min=1"`
ProfilerAddress string `mapstructure:"profiler-address" validate:"omitempty,hostname_port"`
ClusterUUID string `mapstructure:"cluster-uuid" validate:"omitempty"`
Debug bool `mapstructure:"debug"`
AgentTokenSecret string `mapstructure:"agent-token-secret" validate:"required"`
BuildkiteToken string `mapstructure:"buildkite-token" validate:"required"`
Image string `mapstructure:"image" validate:"required"`
JobTTL time.Duration `mapstructure:"job-ttl"`
MaxInFlight int `mapstructure:"max-in-flight" validate:"min=0"`
Namespace string `mapstructure:"namespace" validate:"required"`
Org string `mapstructure:"org" validate:"required"`
Tags stringSlice `mapstructure:"tags" validate:"min=1"`
ProfilerAddress string `mapstructure:"profiler-address" validate:"omitempty,hostname_port"`
ClusterUUID string `mapstructure:"cluster-uuid" validate:"omitempty"`
PodSpecPatch map[string]interface{} `mapstructure:"pod-spec-patch" validate:"omitempty"`
Copy link
Contributor

@triarius triarius Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if this is specified as something k8s would not recognise like:

llamas: true

Is it a no-op, or is there a failure of the patch to apply which causes the Job to fail to start? If it's the latter, then I suggest we should try to detect this as early as possible.

It's most likely too onerous to validate it in the JSON Schema for the helm chart, but I think if some Kubernetes library exports as type for this, we should try to marshal into that so that the helm chart fails to deploy if podSpecPatch would cause and error.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, both this and #248 will need to update to the Helm chart's JSONSchema

SSHCredentialsSecret string `mapstructure:"ssh-credentials-secret" validate:"omitempty"`
}

type stringSlice []string
Expand All @@ -39,6 +41,7 @@ func (s stringSlice) MarshalLogArray(enc zapcore.ArrayEncoder) error {

func (c Config) MarshalLogObject(enc zapcore.ObjectEncoder) error {
enc.AddString("agent-token-secret", c.AgentTokenSecret)
enc.AddString("ssh-credentials-secret", c.SSHCredentialsSecret)
enc.AddBool("debug", c.Debug)
enc.AddString("image", c.Image)
enc.AddDuration("job-ttl", c.JobTTL)
Expand All @@ -47,5 +50,8 @@ func (c Config) MarshalLogObject(enc zapcore.ObjectEncoder) error {
enc.AddString("org", c.Org)
enc.AddString("profiler-address", c.ProfilerAddress)
enc.AddString("cluster-uuid", c.ClusterUUID)
if err := enc.AddReflected("pod-spec-patch", c.PodSpecPatch); err != nil {
return err
}
return enc.AddArray("tags", c.Tags)
}
10 changes: 6 additions & 4 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ func Run(
}

sched := scheduler.New(logger.Named("scheduler"), k8sClient, scheduler.Config{
Namespace: cfg.Namespace,
Image: cfg.Image,
AgentToken: cfg.AgentTokenSecret,
JobTTL: cfg.JobTTL,
Namespace: cfg.Namespace,
Image: cfg.Image,
AgentToken: cfg.AgentTokenSecret,
JobTTL: cfg.JobTTL,
PodSpecPatch: cfg.PodSpecPatch,
SSHCredentialsSecret: cfg.SSHCredentialsSecret,
})
limiter := scheduler.NewLimiter(logger.Named("limiter"), sched, cfg.MaxInFlight)

Expand Down
180 changes: 141 additions & 39 deletions internal/controller/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/client-go/kubernetes"
"k8s.io/utils/pointer"
)
Expand All @@ -30,10 +32,12 @@ const (
)

type Config struct {
Namespace string
Image string
AgentToken string
JobTTL time.Duration
Namespace string
Image string
AgentToken string
JobTTL time.Duration
PodSpecPatch map[string]interface{}
SSHCredentialsSecret string
}

func New(logger *zap.Logger, client kubernetes.Interface, cfg Config) *worker {
Expand All @@ -45,11 +49,13 @@ func New(logger *zap.Logger, client kubernetes.Interface, cfg Config) *worker {
}

type KubernetesPlugin struct {
PodSpec *corev1.PodSpec
GitEnvFrom []corev1.EnvFromSource
Sidecars []corev1.Container `json:"sidecars,omitempty"`
Metadata Metadata
ExtraVolumeMounts []corev1.VolumeMount
PodSpec *corev1.PodSpec `json:"podSpec,omitempty"`
PodSpecPatch map[string]interface{} `json:"podSpecPatch,omitempty"`
SSHCredentialsSecret string `json:"sshCredentialsSecret,omitempty"`
GitEnvFrom []corev1.EnvFromSource `json:"gitEnvFrom,omitempty"`
Sidecars []corev1.Container `json:"sidecars,omitempty"`
Metadata Metadata `json:"metadata,omitempty"`
ExtraVolumeMounts []corev1.VolumeMount `json:"extraVolumeMounts,omitempty"`
}

type Metadata struct {
Expand Down Expand Up @@ -99,6 +105,7 @@ type jobWrapper struct {
logger *zap.Logger
job *api.CommandJob
envMap map[string]string
envFrom []corev1.EnvFromSource
err error
k8sPlugin KubernetesPlugin
otherPlugins []map[string]json.RawMessage
Expand All @@ -107,10 +114,11 @@ type jobWrapper struct {

func NewJobWrapper(logger *zap.Logger, job *api.CommandJob, config Config) *jobWrapper {
return &jobWrapper{
logger: logger,
job: job,
cfg: config,
envMap: make(map[string]string),
logger: logger,
job: job,
cfg: config,
envMap: make(map[string]string),
envFrom: make([]corev1.EnvFromSource, 0),
}
}

Expand Down Expand Up @@ -210,6 +218,24 @@ func (w *jobWrapper) Build() (*batchv1.Job, error) {
Value: w.job.Uuid,
},
}

// Generate env from configuration for git credentials
secretName := w.cfg.SSHCredentialsSecret
if w.k8sPlugin.SSHCredentialsSecret != "" {
secretName = w.k8sPlugin.SSHCredentialsSecret
}

if secretName != "" && len(w.k8sPlugin.GitEnvFrom) == 0 {
w.envFrom = append(w.envFrom, corev1.EnvFromSource{
SecretRef: &corev1.SecretEnvSource{
LocalObjectReference: corev1.LocalObjectReference{Name: secretName},
},
})
} else if len(w.k8sPlugin.GitEnvFrom) > 0 {
w.logger.Warn("git-env-from is deprecated, please use ssh-credentials-secret instead")
w.envFrom = append(w.envFrom, w.k8sPlugin.GitEnvFrom...)
}

if w.otherPlugins != nil {
otherPluginsJson, err := json.Marshal(w.otherPlugins)
if err != nil {
Expand All @@ -236,39 +262,47 @@ func (w *jobWrapper) Build() (*batchv1.Job, error) {
kjob.Spec.TTLSecondsAfterFinished = &ttl

podSpec := &kjob.Spec.Template.Spec

containerEnv := env
containerEnv = append(containerEnv, corev1.EnvVar{
Name: "BUILDKITE_AGENT_EXPERIMENT",
Value: "kubernetes-exec",
}, corev1.EnvVar{
Name: "BUILDKITE_BOOTSTRAP_PHASES",
Value: "plugin,command",
}, corev1.EnvVar{
Name: "BUILDKITE_AGENT_NAME",
Value: "buildkite",
}, corev1.EnvVar{
Name: "BUILDKITE_PLUGINS_PATH",
Value: "/tmp",
}, corev1.EnvVar{
Name: clicommand.RedactedVars.EnvVar,
Value: strings.Join(clicommand.RedactedVars.Value.Value(), ","),
}, corev1.EnvVar{
Name: "BUILDKITE_SHELL",
Value: "/bin/sh -ec",
}, corev1.EnvVar{
Name: "BUILDKITE_ARTIFACT_PATHS",
Value: w.envMap["BUILDKITE_ARTIFACT_PATHS"],
})

for i, c := range podSpec.Containers {
command := strings.Join(append(c.Command, c.Args...), " ")
// If the command is empty, use the command from the step
command := w.job.Command
if len(c.Command) > 0 {
command = strings.Join(append(c.Command, c.Args...), " ")
}
c.Command = []string{"/workspace/buildkite-agent"}
c.Args = []string{"bootstrap"}
c.ImagePullPolicy = corev1.PullAlways
c.Env = append(c.Env, env...)
c.Env = append(c.Env, containerEnv...)
c.Env = append(c.Env, corev1.EnvVar{
Name: "BUILDKITE_COMMAND",
Value: command,
}, corev1.EnvVar{
Name: "BUILDKITE_AGENT_EXPERIMENT",
Value: "kubernetes-exec",
}, corev1.EnvVar{
Name: "BUILDKITE_BOOTSTRAP_PHASES",
Value: "plugin,command",
}, corev1.EnvVar{
Name: "BUILDKITE_AGENT_NAME",
Value: "buildkite",
}, corev1.EnvVar{
Name: "BUILDKITE_CONTAINER_ID",
Value: strconv.Itoa(i + systemContainers),
}, corev1.EnvVar{
Name: "BUILDKITE_PLUGINS_PATH",
Value: "/tmp",
}, corev1.EnvVar{
Name: clicommand.RedactedVars.EnvVar,
Value: strings.Join(clicommand.RedactedVars.Value.Value(), ","),
}, corev1.EnvVar{
Name: "BUILDKITE_SHELL",
Value: "/bin/sh -ec",
}, corev1.EnvVar{
Name: "BUILDKITE_ARTIFACT_PATHS",
Value: w.envMap["BUILDKITE_ARTIFACT_PATHS"],
})
if c.Name == "" {
c.Name = fmt.Sprintf("%s-%d", "container", i)
Expand All @@ -277,18 +311,39 @@ func (w *jobWrapper) Build() (*batchv1.Job, error) {
c.WorkingDir = "/workspace"
}
c.VolumeMounts = append(c.VolumeMounts, volumeMounts...)
c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...)
c.EnvFrom = append(c.EnvFrom, w.envFrom...)
podSpec.Containers[i] = c
}

if len(podSpec.Containers) == 0 {
podSpec.Containers = append(podSpec.Containers, corev1.Container{
Name: "container-0",
Image: w.cfg.Image,
Command: []string{"/workspace/buildkite-agent"},
Args: []string{"bootstrap"},
WorkingDir: "/workspace",
VolumeMounts: volumeMounts,
ImagePullPolicy: corev1.PullAlways,
Env: append(containerEnv,
corev1.EnvVar{
Name: "BUILDKITE_COMMAND",
Value: w.job.Command,
}, corev1.EnvVar{
Name: "BUILDKITE_CONTAINER_ID",
Value: strconv.Itoa(0 + systemContainers),
}),
EnvFrom: w.envFrom,
})
}

containerCount := len(podSpec.Containers) + systemContainers

for i, c := range w.k8sPlugin.Sidecars {
if c.Name == "" {
c.Name = fmt.Sprintf("%s-%d", "sidecar", i)
}
c.VolumeMounts = append(c.VolumeMounts, volumeMounts...)
c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...)
c.EnvFrom = append(c.EnvFrom, w.envFrom...)
podSpec.Containers = append(podSpec.Containers, c)
}

Expand Down Expand Up @@ -381,9 +436,56 @@ func (w *jobWrapper) Build() (*batchv1.Job, error) {
})
podSpec.RestartPolicy = corev1.RestartPolicyNever

// Allow podSpec to be overridden by the agent configuration and the k8s plugin

// Patch from the agent is applied first
var err error
if w.cfg.PodSpecPatch != nil {
w.logger.Info("applying podSpec patch from agent")
podSpec, err = patchPodSpec(podSpec, w.cfg.PodSpecPatch)
if err != nil {
return nil, fmt.Errorf("failed to apply podSpec patch from agent: %w", err)
}
}

if w.k8sPlugin.PodSpecPatch != nil {
w.logger.Info("applying podSpec patch from k8s plugin")
podSpec, err = patchPodSpec(podSpec, w.k8sPlugin.PodSpecPatch)
if err != nil {
return nil, fmt.Errorf("failed to apply podSpec patch from k8s plugin: %w", err)
}
}

kjob.Spec.Template.Spec = *podSpec

return kjob, nil
}

func patchPodSpec(original *corev1.PodSpec, patchMap map[string]interface{}) (*corev1.PodSpec, error) {
originalJSON, err := json.Marshal(original)
if err != nil {
return nil, fmt.Errorf("error converting original to JSON: %v", err)
}

patch := &unstructured.Unstructured{Object: patchMap}
patchJSON, err := patch.MarshalJSON()
if err != nil {
return nil, fmt.Errorf("error converting patch to JSON: %v", err)
}

patchedJSON, err := strategicpatch.StrategicMergePatch(originalJSON, patchJSON, corev1.PodSpec{})
if err != nil {
return nil, fmt.Errorf("error applying strategic patch: %v", err)
}

var patchedSpec corev1.PodSpec
if err := json.Unmarshal(patchedJSON, &patchedSpec); err != nil {
return nil, fmt.Errorf("error converting patched JSON to PodSpec: %v", err)
}

return &patchedSpec, nil
}

func (w *jobWrapper) createCheckoutContainer(
kjob *batchv1.Job,
env []corev1.EnvVar,
Expand Down Expand Up @@ -413,7 +515,7 @@ func (w *jobWrapper) createCheckoutContainer(
Value: "0",
},
},
EnvFrom: w.k8sPlugin.GitEnvFrom,
EnvFrom: w.envFrom,
}
checkoutContainer.Env = append(checkoutContainer.Env, env...)

Expand Down
Loading