diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index b8b328aa..2df6c49a 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.3 +version: 0.13.5 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 912e9db1..15d1e754 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -148,6 +148,8 @@ spec: {{- toYaml .Values.agent.nodeSelector | nindent 8 }} tolerations: {{- toYaml .Values.agent.tolerations | nindent 8 }} + affinity: + {{- toYaml .Values.agent.affinity | nindent 8 }} --- {{- if .Capabilities.APIVersions.Has "policy/v1" }} apiVersion: policy/v1 @@ -160,4 +162,4 @@ spec: matchLabels: app: launch-agent-{{ .Release.Name }} --- -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index b39f309d..52b0a8c5 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -5,7 +5,7 @@ agent: # Providing API key can be done external to this chart useExternalWandbSecret: false # Container image to use for the agent. - image: wandb/launch-agent:0.16.6 + image: wandb/launch-agent:0.17.1 # Image pull policy for agent image. imagePullPolicy: Always # Resources block for the agent spec. @@ -21,6 +21,8 @@ agent: minAvailable: 1 # Tolerations for the agent pod. tolerations: [] + # Affinites for the agent pod. + affinity: {} # Namespace to deploy launch agent into namespace: wandb diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index b246b5d4..6cc7c094 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -29,5 +29,11 @@ dependencies: - name: flat-run-fields-updater repository: file://charts/flat-run-fields-updater version: 0.1.0 -digest: sha256:72ce111a55d35fac65edc81862f81dd1c0a6ad747aa2a6b2522966f91b27c814 -generated: "2024-03-26T20:48:42.072569696Z" +- name: nginx + repository: file://charts/nginx + version: 0.1.0 +- name: stackdriver + repository: file://charts/stackdriver + version: 0.1.0 +digest: sha256:9a6c69506deb6969686d5b220a0692b53cfa29642e059bdf27c440c5d7086bdb +generated: "2024-06-05T11:04:02.508473-07:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 2db35504..d68a4a15 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.1 +version: 0.13.14 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -52,3 +52,11 @@ dependencies: version: "*.*.*" repository: file://charts/flat-run-fields-updater condition: flat-run-fields-updater.install + - name: nginx + version: "*.*.*" + repository: file://charts/nginx + condition: nginx.install + - name: stackdriver + version: "*.*.*" + repository: file://charts/stackdriver + condition: stackdriver.install diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index 3e66dc74..c543eccc 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -158,9 +158,11 @@ spec: - name: OIDC_CLIENT_ID value: {{ .Values.global.auth.oidc.clientId }} - name: OIDC_AUTH_METHOD - value: {{ .Values.global.auth.oidc.method }} + value: {{ .Values.global.auth.oidc.authMethod }} - name: OIDC_ISSUER value: {{ .Values.global.auth.oidc.issuer }} + - name: OIDC_CLIENT_SECRET + value: {{ .Values.global.auth.oidc.secret }} {{- end }} - name: GORILLA_SESSION_LENGTH diff --git a/charts/operator-wandb/charts/nginx/.helmignore b/charts/operator-wandb/charts/nginx/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/nginx/Chart.yaml b/charts/operator-wandb/charts/nginx/Chart.yaml new file mode 100644 index 00000000..31541df3 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: nginx +type: application +description: A Helm chart for Kubernetes + +version: 0.1.0 +appVersion: "1.25.5" + +home: https://wandb.ai +icon: https://wandb.ai/logo.svg + +maintainers: + - name: wandb + email: support@wandb.com + url: https://wandb.com diff --git a/charts/operator-wandb/charts/nginx/templates/_helpers.tpl b/charts/operator-wandb/charts/nginx/templates/_helpers.tpl new file mode 100644 index 00000000..0b81fce4 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/_helpers.tpl @@ -0,0 +1,124 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "nginx.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified nginx name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "nginx.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "nginx.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "nginx.labels" -}} +helm.sh/chart: {{ include "nginx.chart" . }} +{{ include "nginx.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "nginx.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "nginx.selectorLabels" -}} +app.kubernetes.io/name: {{ include "nginx.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "nginx.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "nginx.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "nginx.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +nginx deployments. +*/}} +{{- define "nginx.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} + + +{{- define "nginx.nodeSelector" -}} +{{- $nodeSelector := default .Values.global.nodeSelector .Values.nodeSelector -}} +{{- if $nodeSelector }} +nodeSelector: + {{- toYaml $nodeSelector | nindent 2 }} +{{- end }} +{{- end -}} + + +{{/* +Return a PodSecurityContext definition. + +Usage: + {{ include "nginx.podSecurityContext" .Values.pod.securityContext }} +*/}} +{{- define "nginx.podSecurityContext" -}} +{{- $psc := . }} +{{- if $psc }} +securityContext: +{{- if not (empty $psc.runAsUser) }} + runAsUser: {{ $psc.runAsUser }} +{{- end }} +{{- if not (empty $psc.runAsGroup) }} + runAsGroup: {{ $psc.runAsGroup }} +{{- end }} +{{- if not (empty $psc.fsGroup) }} + fsGroup: {{ $psc.fsGroup }} +{{- end }} +{{- if not (empty $psc.fsGroupChangePolicy) }} + fsGroupChangePolicy: {{ $psc.fsGroupChangePolicy }} +{{- end }} +{{- end }} +{{- end -}} \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/configmap.yaml b/charts/operator-wandb/charts/nginx/templates/configmap.yaml new file mode 100644 index 00000000..72573c9e --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/configmap.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "nginx.fullname" . }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.configMap.labels -}} + {{- toYaml .Values.configMap.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.configMap.annotations -}} + {{- toYaml .Values.configMap.annotations | nindent 4 }} + {{- end }} +data: + nginx.conf: | + worker_processes auto; + + error_log /var/log/nginx/error.log notice; + pid /tmp/nginx.pid; + + + events { + worker_connections 1024; + } + + http { + server { + listen 8080; + location / { + proxy_pass http://{{ .Release.Name }}-app:8080; + } + location /console { + proxy_pass http://{{ .Release.Name }}-console:8082; + } + } + } \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/deployment.yaml b/charts/operator-wandb/charts/nginx/templates/deployment.yaml new file mode 100644 index 00000000..8592202b --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/deployment.yaml @@ -0,0 +1,54 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + name: {{ include "nginx.fullname" . }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "nginx.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "nginx.commonLabels" . | nindent 8 }} + {{- include "nginx.podLabels" . | nindent 8 }} + {{- include "nginx.labels" . | nindent 8 }} + annotations: + checksum/configmap: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "nginx.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "nginx.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + name: {{ .Chart.Name }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + ports: + - containerPort: 8080 + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + volumes: + - name: nginx-config + configMap: + name: {{ include "nginx.fullname" . }} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/hpa.yaml b/charts/operator-wandb/charts/nginx/templates/hpa.yaml new file mode 100644 index 00000000..14a97231 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/hpa.yaml @@ -0,0 +1,22 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "nginx.fullname" . }} + namespace: {{ $.Release.Namespace }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "nginx.fullname" . }} + minReplicas: 1 + maxReplicas: 1 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/service.yaml b/charts/operator-wandb/charts/nginx/templates/service.yaml new file mode 100644 index 00000000..b037cdd0 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "nginx.fullname" . }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - protocol: TCP + port: 80 + targetPort: 8080 + selector: + {{- include "nginx.labels" . | nindent 4 }} diff --git a/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml b/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml new file mode 100644 index 00000000..9add01ef --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "nginx.serviceAccountName" . }} + namespace: {{ $.Release.Namespace }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} diff --git a/charts/operator-wandb/charts/nginx/values.yaml b/charts/operator-wandb/charts/nginx/values.yaml new file mode 100644 index 00000000..37805c8e --- /dev/null +++ b/charts/operator-wandb/charts/nginx/values.yaml @@ -0,0 +1,48 @@ +enabled: true +nameOverride: "" +fullnameOverride: "" + +image: + repository: nginxinc/nginx-unprivileged + tag: latest + pullPolicy: Always + # pullSecrets: [] + +# Tolerations for pod scheduling +tolerations: [] + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + annotations: {} + +common: + labels: {} + +deployment: + labels: {} + annotations: {} + +service: + type: ClusterIP + annotations: {} + labels: {} + +resources: + # We usually recommend not to specify default resources and to leave this as a + # conscious choice for the user. This also increases chances charts run on + # environments with little resources, such as Minikube. If you do want to + # specify resources, uncomment the following lines, adjust them as necessary, + # and remove the curly braces after 'resources:'. + requests: + cpu: 100m + memory: 1Gi + limits: + cpu: 4000m + memory: 8Gi + +serviceAccount: + create: true +configMap: {} \ No newline at end of file diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 58c34b8f..4d082e29 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -43,12 +43,6 @@ spec: containers: - name: backfill-job image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - volumeMounts: - {{- if ne (include "wandb.redis.caCert" .) "" }} - - name: {{ include "parquet.fullname" . }}-redis-ca - mountPath: /etc/ssl/certs/redis_ca.pem - subPath: redis_ca.pem - {{- end }} command: [ "/sbin/my_init", "--skip-runit", @@ -134,7 +128,6 @@ spec: valueFrom: fieldRef: fieldPath: status.hostIP - {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} restartPolicy: Never diff --git a/charts/operator-wandb/charts/stackdriver/.helmignore b/charts/operator-wandb/charts/stackdriver/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/stackdriver/Chart.yaml b/charts/operator-wandb/charts/stackdriver/Chart.yaml new file mode 100644 index 00000000..0bfa3581 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: stackdriver +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "0.15.0" diff --git a/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl b/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl new file mode 100644 index 00000000..3b833602 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl @@ -0,0 +1,101 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "stackdriver.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "stackdriver.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "stackdriver.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "stackdriver.labels" -}} +helm.sh/chart: {{ include "stackdriver.chart" . }} +{{ include "stackdriver.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "stackdriver.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "stackdriver.selectorLabels" -}} +app.kubernetes.io/name: {{ include "stackdriver.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "stackdriver.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "stackdriver.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "stackdriver.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "stackdriver.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "stackdriver.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} diff --git a/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml b/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml new file mode 100644 index 00000000..6e5ee2a4 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml @@ -0,0 +1,123 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "stackdriver.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "stackdriver.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "stackdriver.commonLabels" . | nindent 8 }} + {{- include "stackdriver.podLabels" . | nindent 8 }} + {{- include "stackdriver.labels" . | nindent 8 }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: ["stackdriver_exporter"] + volumeMounts: + {{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }} + - name: stackdriver-service-account + mountPath: /etc/secrets/service-account/ + {{- end}} + args: + - --google.project-id={{ .Values.stackdriver.projectId }} + - --monitoring.metrics-interval={{ .Values.stackdriver.metrics.interval }} + - --monitoring.metrics-offset={{ .Values.stackdriver.metrics.offset }} + - --monitoring.metrics-type-prefixes={{ .Values.stackdriver.metrics.typePrefixes | replace " " "" }} + {{- range .Values.stackdriver.metrics.filters }} + - --monitoring.filters={{ . }} + {{- end }} + - --stackdriver.backoff-jitter={{ .Values.stackdriver.backoffJitter }} + - --stackdriver.http-timeout={{ .Values.stackdriver.httpTimeout }} + - --stackdriver.max-backoff={{ .Values.stackdriver.maxBackoff }} + - --stackdriver.max-retries={{ .Values.stackdriver.maxRetries }} + - --stackdriver.retry-statuses={{ .Values.stackdriver.retryStatuses }} + - --web.listen-address={{ .Values.web.listenAddress }} + - --web.telemetry-path={{ .Values.web.path }} + {{- if .Values.stackdriver.dropDelegatedProjects }} + - --monitoring.drop-delegated-projects + {{- end }} + {{- if .Values.stackdriver.metrics.ingestDelay }} + - --monitoring.metrics-ingest-delay + {{- end }} + {{- if .Values.stackdriver.metrics.aggregateDeltas }} + - --monitoring.aggregate-deltas + - --monitoring.aggregate-deltas-ttl={{ .Values.stackdriver.metrics.aggregateDeltasTTL }} + {{- end }} + {{- if .Values.extraArgs }} + {{- range $key, $value := .Values.extraArgs }} + {{- if $value }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- end }} + {{- end }} + {{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }} + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /etc/secrets/service-account/credentials.json + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + ports: + - containerPort: {{ .Values.service.httpPort }} + name: http + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + timeoutSeconds: 10 + restartPolicy: {{ .Values.restartPolicy }} + serviceAccountName: {{ include "stackdriver.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + volumes: + {{- if .Values.stackdriver.serviceAccountSecret }} + - name: stackdriver-service-account + secret: + secretName: {{ .Values.stackdriver.serviceAccountSecret | quote }} + {{- if and (.Values.stackdriver.serviceAccountSecret) (.Values.stackdriver.serviceAccountSecretKey) }} + items: + - key: {{ .Values.stackdriver.serviceAccountSecretKey | quote }} + path: credentials.json + {{- end }} + {{- else if .Values.stackdriver.serviceAccountKey }} + - name: stackdriver-service-account + secret: + secretName: {{ template "stackdriver-exporter.fullname" . }} + {{- end}} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/templates/service.yaml b/charts/operator-wandb/charts/stackdriver/templates/service.yaml new file mode 100644 index 00000000..75915f03 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "stackdriver.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 9255 + protocol: TCP + name: stackdriver + selector: + {{- include "stackdriver.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml b/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml new file mode 100644 index 00000000..e4638ff2 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "stackdriver.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/values.yaml b/charts/operator-wandb/charts/stackdriver/values.yaml new file mode 100644 index 00000000..4a248f93 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/values.yaml @@ -0,0 +1,102 @@ +enabled: true + +nameOverride: "" +fullnameOverride: "" + +image: + repository: prometheuscommunity/stackdriver-exporter + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: latest + # pullSecrets: [] + +# Tolerations for pod scheduling +tolerations: [] + +restartPolicy: Always +replicaCount: 1 + +extraEnv: {} + +extraEnvFrom: {} + +extraArgs: {} + +stackdriver: + # The Google Project ID to gather metrics for + projectId: "FALSE" + # An existing secret which contains credentials.json + serviceAccountSecret: "" + # Provide custom key for the existing secret to load credentials.json from + serviceAccountSecretKey: "" + # A service account key JSON file. Must be provided when no existing secret is used, in this case a new secret will be created holding this service account + serviceAccountKey: "" + # Max number of retries that should be attempted on 503 errors from Stackdriver + maxRetries: 0 + # How long should Stackdriver_exporter wait for a result from the Stackdriver API + httpTimeout: 10s + # Max time between each request in an exp backoff scenario + maxBackoff: 5s + # The amount of jitter to introduce in an exp backoff scenario + backoffJitter: 1s + # The HTTP statuses that should trigger a retry + retryStatuses: 503 + # Drop metrics from attached projects and fetch `project_id` only + dropDelegatedProjects: false + metrics: + # The prefixes to gather metrics for, we default to just CPU metrics. + typePrefixes: 'cloudsql.googleapis.com/database,redis' + # The filters to refine the metrics query by using Filter objects that Google provides. + # Filter objects: project, group.id, resource.type, resource.labels.[KEY], metric.type, metric.labels.[KEY] + # https://cloud.google.com/monitoring/api/v3/filters + filters: [] + # - 'pubsub.googleapis.com/subscription:resource.labels.subscription_id=monitoring.regex.full_match("us-west4.*my-team.*")' + # The frequency to request + interval: '5m' + # How far into the past to offset + offset: '0s' + # Offset for the Google Stackdriver Monitoring Metrics interval into the past by the ingest delay from the metric's metadata. + ingestDelay: false + # If enabled will treat all DELTA metrics as an in-memory counter instead of a gauge. + aggregateDeltas: false + # How long should a delta metric continue to be exported after GCP stops producing a metric + aggregateDeltasTTL: '30m' + +web: + # Port to listen on + listenAddress: ':9255' + # Path under which to expose metrics. + path: /metrics + +secret: + labels: {} +customLabels: {} + # app: prometheus-stackdriver-exporter + +service: + type: ClusterIP + httpPort: 9255 + annotations: {} + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + +deployment: {} + +serviceAccount: + create: true + +common: + labels: {} + annotations: {} + +resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 500m + memory: 500Mi \ No newline at end of file diff --git a/charts/operator-wandb/templates/ingress.yaml b/charts/operator-wandb/templates/ingress.yaml index ad7ca341..44a184c2 100644 --- a/charts/operator-wandb/templates/ingress.yaml +++ b/charts/operator-wandb/templates/ingress.yaml @@ -1,4 +1,17 @@ {{- if .Values.ingress.issuer.create }} +{{- if eq .Values.ingress.issuer.provider "google" }} +--- +apiVersion: networking.gke.io/v1 +kind: ManagedCertificate +metadata: + name: {{ .Release.Name }}-cert + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} +spec: + domains: + - {{ replace "https://" "" (replace "http://" "" .Values.global.host) }} +{{- else }} +--- apiVersion: cert-manager.io/v1 kind: Issuer metadata: @@ -20,6 +33,7 @@ spec: ingress: class: {{ .Values.ingress.class }} {{- end }} +{{- end }} {{- if .Values.ingress.install }} --- {{- if .Values.ingress.create }} @@ -34,8 +48,12 @@ metadata: {{- end }} annotations: {{- if .Values.ingress.issuer.create }} + {{- if eq .Values.ingress.issuer.provider "google" }} + "networking.gke.io/managed-certificates" : "{{ .Release.Name }}-cert" + {{- else }} "cert-manager.io/issuer": "{{ .Release.Name }}-issuer" "cert-manager.io/acme-challenge-type": "http01" + {{- end }} "kubernetes.io/ingress.allow-http" : "false" {{- end }} @@ -47,9 +65,11 @@ spec: {{- $defaultHost := include "defaultHost" . -}} {{- if .Values.ingress.issuer.create }} + {{- if ne .Values.ingress.issuer.provider "google" }} tls: - host: [{{ $defaultHost }}] secretName: {{ .Release.Name }}-tls + {{- end }} {{- else }} tls: {{ toYaml .Values.ingress.tls | nindent 4 }} {{- end }} @@ -65,7 +85,7 @@ spec: {{- end }} {{- end }} --- -{{- if .Values.ingress.secondary.create }} +{{- if and .Values.ingress.secondary .Values.ingress.secondary.create }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index be2c1a5b..a5687855 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -145,6 +145,9 @@ app: repository: wandb/local tag: latest +nginx: + install: false + weave: install: true image: @@ -164,7 +167,7 @@ flat-run-fields-updater: tag: latest mysql: - install: true + install: false persistence: size: 20Gi storageClass: "" @@ -195,6 +198,15 @@ prometheus: mysql-exporter: install: true +stackdriver: + install: true + pod: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9255" + prometheus.io/path: "/metrics" + prometheus.io/scheme: http + otel: install: true diff --git a/charts/operator/templates/deployment.yaml b/charts/operator/templates/deployment.yaml index 41ea132b..d72ba7b4 100644 --- a/charts/operator/templates/deployment.yaml +++ b/charts/operator/templates/deployment.yaml @@ -67,7 +67,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=10 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.5.0 + image: {{ .Values.rbacProxy.image.repository }}:{{ .Values.rbacProxy.image.tag }} name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/charts/operator/values.yaml b/charts/operator/values.yaml index 7b77892d..30e80bfc 100644 --- a/charts/operator/values.yaml +++ b/charts/operator/values.yaml @@ -27,4 +27,15 @@ manager: serviceAccount: create: true +clusterRole: + rules: + - apiGroups: ["*"] + resources: ["*"] + verbs: ["*"] + +rbacProxy: + image: + repository: gcr.io/kubebuilder/kube-rbac-proxy + tag: v0.5.0 + airgapped: false