From e448bd2b3688465b7323d7a4df435c7289afce79 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Wed, 22 May 2024 13:17:06 -0700 Subject: [PATCH 01/16] fix: Add empty field for attributes (#130) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/otel/charts/agent/templates/_config.tpl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 3d1354901..2db35504d 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.0 +version: 0.13.1 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl b/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl index c6ee0876a..9c7dea59d 100644 --- a/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl +++ b/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl @@ -48,6 +48,7 @@ extensions: {{- define "otelAgent.processors" -}} processors: batch: {} + attributes: {} memory_limiter: check_interval: 5s limit_percentage: 80 From e51ea437c9b6c4194d77ede0e5406c7f29cdb8ba Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Wed, 22 May 2024 13:25:40 -0700 Subject: [PATCH 02/16] feat: Option to create cert for gce (#132) Co-authored-by: Justin Brooks --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/templates/ingress.yaml | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 2db35504d..0cb1f614e 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.1 +version: 0.13.2 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/templates/ingress.yaml b/charts/operator-wandb/templates/ingress.yaml index ad7ca3413..e6d5102d2 100644 --- a/charts/operator-wandb/templates/ingress.yaml +++ b/charts/operator-wandb/templates/ingress.yaml @@ -1,4 +1,17 @@ {{- if .Values.ingress.issuer.create }} +{{- if eq .Values.ingress.issuer.provider "google" }} +--- +apiVersion: networking.gke.io/v1 +kind: ManagedCertificate +metadata: + name: {{ .Release.Name }}-cert + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} +spec: + domains: + - {{ replace "https://" "" (replace "http://" "" .Values.global.host) }} +{{- else }} +--- apiVersion: cert-manager.io/v1 kind: Issuer metadata: @@ -20,6 +33,7 @@ spec: ingress: class: {{ .Values.ingress.class }} {{- end }} +{{- end }} {{- if .Values.ingress.install }} --- {{- if .Values.ingress.create }} @@ -34,8 +48,12 @@ metadata: {{- end }} annotations: {{- if .Values.ingress.issuer.create }} + {{- if eq .Values.ingress.issuer.provider "google" }} + "networking.gke.io/managed-certificates" : "{{ .Release.Name }}-cert" + {{- else }} "cert-manager.io/issuer": "{{ .Release.Name }}-issuer" "cert-manager.io/acme-challenge-type": "http01" + {{- end }} "kubernetes.io/ingress.allow-http" : "false" {{- end }} @@ -47,9 +65,11 @@ spec: {{- $defaultHost := include "defaultHost" . -}} {{- if .Values.ingress.issuer.create }} + {{- if ne .Values.ingress.issuer.provider "google" }} tls: - host: [{{ $defaultHost }}] secretName: {{ .Release.Name }}-tls + {{- end }} {{- else }} tls: {{ toYaml .Values.ingress.tls | nindent 4 }} {{- end }} From fb96aa6143fe2fd0f4de8e0a828f254901e28423 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Wed, 22 May 2024 16:43:05 -0700 Subject: [PATCH 03/16] fix: Check if secondary is nil (#134) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/templates/ingress.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 0cb1f614e..be1ff7dfc 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.2 +version: 0.13.3 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/templates/ingress.yaml b/charts/operator-wandb/templates/ingress.yaml index e6d5102d2..44a184c2a 100644 --- a/charts/operator-wandb/templates/ingress.yaml +++ b/charts/operator-wandb/templates/ingress.yaml @@ -85,7 +85,7 @@ spec: {{- end }} {{- end }} --- -{{- if .Values.ingress.secondary.create }} +{{- if and .Values.ingress.secondary .Values.ingress.secondary.create }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: From f0905a6c29c53bafa4f2349b5cb5ef7645c50bc4 Mon Sep 17 00:00:00 2001 From: Ramit Goolry <93002515+ramit-wandb@users.noreply.github.com> Date: Thu, 23 May 2024 05:24:34 +0530 Subject: [PATCH 04/16] revert: fix: mount redis ca cert to backfiller (#133) Co-authored-by: Aditya Choudhari --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/parquet/templates/cron.yaml | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index be1ff7dfc..3a45a4ea0 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.3 +version: 0.13.4 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 58c34b8f3..f6ccab09f 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -43,12 +43,6 @@ spec: containers: - name: backfill-job image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - volumeMounts: - {{- if ne (include "wandb.redis.caCert" .) "" }} - - name: {{ include "parquet.fullname" . }}-redis-ca - mountPath: /etc/ssl/certs/redis_ca.pem - subPath: redis_ca.pem - {{- end }} command: [ "/sbin/my_init", "--skip-runit", From 82eca4b004b3aa2a6ad9793029fde6b9f6a17a44 Mon Sep 17 00:00:00 2001 From: Ramit Goolry <93002515+ramit-wandb@users.noreply.github.com> Date: Wed, 29 May 2024 00:32:10 +0530 Subject: [PATCH 05/16] fix: Mount Redis CA Cert (#136) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/parquet/templates/cron.yaml | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 3a45a4ea0..4b5198e6c 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.4 +version: 0.13.5 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index f6ccab09f..787f23656 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -49,6 +49,12 @@ spec: "megabinary", "glue", ] + volumeMounts: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + mountPath: /etc/ssl/certs/redis_ca.pem + subPath: redis_ca.pem + {{- end }} env: - name: GORILLA_GLUE_EXECUTE value: "true" @@ -128,7 +134,15 @@ spec: valueFrom: fieldRef: fieldPath: status.hostIP - + volumes: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + secret: + secretName: "{{ include "wandb.redis.passwordSecret" . }}" + items: + - key: REDIS_CA_CERT + path: /etc/ssl/certs/redis_ca.pem + {{- end }} {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} restartPolicy: Never From c309b9519d3b9ed25b19a9c3b7890129e828699d Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Tue, 28 May 2024 14:06:35 -0700 Subject: [PATCH 06/16] fix: Use relative path for redis volume (#137) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/parquet/templates/cron.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 4b5198e6c..dd8057429 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.5 +version: 0.13.6 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 787f23656..6363aee34 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -141,7 +141,7 @@ spec: secretName: "{{ include "wandb.redis.passwordSecret" . }}" items: - key: REDIS_CA_CERT - path: /etc/ssl/certs/redis_ca.pem + path: redis_ca.pem {{- end }} {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} From b512d95ea732a4c6d8af1c06b326b7cedd587670 Mon Sep 17 00:00:00 2001 From: Ramit Goolry <93002515+ramit-wandb@users.noreply.github.com> Date: Wed, 29 May 2024 04:42:01 +0530 Subject: [PATCH 07/16] revert: fix: mount redis CA (#138) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/parquet/templates/cron.yaml | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index dd8057429..6f513a60a 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.6 +version: 0.13.7 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 6363aee34..4d082e296 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -49,12 +49,6 @@ spec: "megabinary", "glue", ] - volumeMounts: - {{- if ne (include "wandb.redis.caCert" .) "" }} - - name: {{ include "parquet.fullname" . }}-redis-ca - mountPath: /etc/ssl/certs/redis_ca.pem - subPath: redis_ca.pem - {{- end }} env: - name: GORILLA_GLUE_EXECUTE value: "true" @@ -134,15 +128,6 @@ spec: valueFrom: fieldRef: fieldPath: status.hostIP - volumes: - {{- if ne (include "wandb.redis.caCert" .) "" }} - - name: {{ include "parquet.fullname" . }}-redis-ca - secret: - secretName: "{{ include "wandb.redis.passwordSecret" . }}" - items: - - key: REDIS_CA_CERT - path: redis_ca.pem - {{- end }} {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} restartPolicy: Never From 1ea5c6eeeefc07cf08fc3fcea2d7615c8bc2560e Mon Sep 17 00:00:00 2001 From: Anthony Rabbito Date: Wed, 29 May 2024 14:08:15 -0400 Subject: [PATCH 08/16] feat: add affinity template into agent (#126) Signed-off-by: Anthony Rabbito Co-authored-by: Ben Sherman --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/templates/deployment.yaml | 4 +++- charts/launch-agent/values.yaml | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index b8b328aa6..1049e168e 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.3 +version: 0.13.4 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 912e9db14..15d1e7540 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -148,6 +148,8 @@ spec: {{- toYaml .Values.agent.nodeSelector | nindent 8 }} tolerations: {{- toYaml .Values.agent.tolerations | nindent 8 }} + affinity: + {{- toYaml .Values.agent.affinity | nindent 8 }} --- {{- if .Capabilities.APIVersions.Has "policy/v1" }} apiVersion: policy/v1 @@ -160,4 +162,4 @@ spec: matchLabels: app: launch-agent-{{ .Release.Name }} --- -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index b39f309de..529a6789b 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -21,6 +21,8 @@ agent: minAvailable: 1 # Tolerations for the agent pod. tolerations: [] + # Affinites for the agent pod. + affinity: {} # Namespace to deploy launch agent into namespace: wandb From a7060b6f5467ae87a4f1118f9a4bb00013eeab10 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Wed, 5 Jun 2024 22:53:49 +0530 Subject: [PATCH 09/16] feat: added nginx support (#128) Co-authored-by: amanpruthi --- charts/operator-wandb/Chart.lock | 7 +- charts/operator-wandb/Chart.yaml | 6 +- .../operator-wandb/charts/nginx/.helmignore | 23 ++++ charts/operator-wandb/charts/nginx/Chart.yaml | 15 +++ .../charts/nginx/templates/_helpers.tpl | 124 ++++++++++++++++++ .../charts/nginx/templates/configmap.yaml | 37 ++++++ .../charts/nginx/templates/deployment.yaml | 54 ++++++++ .../charts/nginx/templates/hpa.yaml | 22 ++++ .../charts/nginx/templates/service.yaml | 22 ++++ .../nginx/templates/serviceaccount.yaml | 14 ++ .../operator-wandb/charts/nginx/values.yaml | 48 +++++++ charts/operator-wandb/values.yaml | 3 + 12 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 charts/operator-wandb/charts/nginx/.helmignore create mode 100644 charts/operator-wandb/charts/nginx/Chart.yaml create mode 100644 charts/operator-wandb/charts/nginx/templates/_helpers.tpl create mode 100644 charts/operator-wandb/charts/nginx/templates/configmap.yaml create mode 100644 charts/operator-wandb/charts/nginx/templates/deployment.yaml create mode 100644 charts/operator-wandb/charts/nginx/templates/hpa.yaml create mode 100644 charts/operator-wandb/charts/nginx/templates/service.yaml create mode 100644 charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml create mode 100644 charts/operator-wandb/charts/nginx/values.yaml diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index b246b5d46..e0e233a29 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -29,5 +29,8 @@ dependencies: - name: flat-run-fields-updater repository: file://charts/flat-run-fields-updater version: 0.1.0 -digest: sha256:72ce111a55d35fac65edc81862f81dd1c0a6ad747aa2a6b2522966f91b27c814 -generated: "2024-03-26T20:48:42.072569696Z" +- name: nginx + repository: file://charts/nginx + version: 0.1.0 +digest: sha256:c6f5bb38d125207b54902f5638451b55807074ea700ff8eea4ea0af2dc19e346 +generated: "2024-05-17T16:55:33.068309+05:30" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 6f513a60a..c11dbe198 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.7 +version: 0.13.8 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -52,3 +52,7 @@ dependencies: version: "*.*.*" repository: file://charts/flat-run-fields-updater condition: flat-run-fields-updater.install + - name: nginx + version: "*.*.*" + repository: file://charts/nginx + condition: nginx.install diff --git a/charts/operator-wandb/charts/nginx/.helmignore b/charts/operator-wandb/charts/nginx/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/nginx/Chart.yaml b/charts/operator-wandb/charts/nginx/Chart.yaml new file mode 100644 index 000000000..31541df3a --- /dev/null +++ b/charts/operator-wandb/charts/nginx/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: nginx +type: application +description: A Helm chart for Kubernetes + +version: 0.1.0 +appVersion: "1.25.5" + +home: https://wandb.ai +icon: https://wandb.ai/logo.svg + +maintainers: + - name: wandb + email: support@wandb.com + url: https://wandb.com diff --git a/charts/operator-wandb/charts/nginx/templates/_helpers.tpl b/charts/operator-wandb/charts/nginx/templates/_helpers.tpl new file mode 100644 index 000000000..0b81fce45 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/_helpers.tpl @@ -0,0 +1,124 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "nginx.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified nginx name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "nginx.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "nginx.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "nginx.labels" -}} +helm.sh/chart: {{ include "nginx.chart" . }} +{{ include "nginx.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "nginx.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "nginx.selectorLabels" -}} +app.kubernetes.io/name: {{ include "nginx.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "nginx.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "nginx.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "nginx.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +nginx deployments. +*/}} +{{- define "nginx.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} + + +{{- define "nginx.nodeSelector" -}} +{{- $nodeSelector := default .Values.global.nodeSelector .Values.nodeSelector -}} +{{- if $nodeSelector }} +nodeSelector: + {{- toYaml $nodeSelector | nindent 2 }} +{{- end }} +{{- end -}} + + +{{/* +Return a PodSecurityContext definition. + +Usage: + {{ include "nginx.podSecurityContext" .Values.pod.securityContext }} +*/}} +{{- define "nginx.podSecurityContext" -}} +{{- $psc := . }} +{{- if $psc }} +securityContext: +{{- if not (empty $psc.runAsUser) }} + runAsUser: {{ $psc.runAsUser }} +{{- end }} +{{- if not (empty $psc.runAsGroup) }} + runAsGroup: {{ $psc.runAsGroup }} +{{- end }} +{{- if not (empty $psc.fsGroup) }} + fsGroup: {{ $psc.fsGroup }} +{{- end }} +{{- if not (empty $psc.fsGroupChangePolicy) }} + fsGroupChangePolicy: {{ $psc.fsGroupChangePolicy }} +{{- end }} +{{- end }} +{{- end -}} \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/configmap.yaml b/charts/operator-wandb/charts/nginx/templates/configmap.yaml new file mode 100644 index 000000000..72573c9ee --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/configmap.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "nginx.fullname" . }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.configMap.labels -}} + {{- toYaml .Values.configMap.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.configMap.annotations -}} + {{- toYaml .Values.configMap.annotations | nindent 4 }} + {{- end }} +data: + nginx.conf: | + worker_processes auto; + + error_log /var/log/nginx/error.log notice; + pid /tmp/nginx.pid; + + + events { + worker_connections 1024; + } + + http { + server { + listen 8080; + location / { + proxy_pass http://{{ .Release.Name }}-app:8080; + } + location /console { + proxy_pass http://{{ .Release.Name }}-console:8082; + } + } + } \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/deployment.yaml b/charts/operator-wandb/charts/nginx/templates/deployment.yaml new file mode 100644 index 000000000..8592202ba --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/deployment.yaml @@ -0,0 +1,54 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + name: {{ include "nginx.fullname" . }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "nginx.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "nginx.commonLabels" . | nindent 8 }} + {{- include "nginx.podLabels" . | nindent 8 }} + {{- include "nginx.labels" . | nindent 8 }} + annotations: + checksum/configmap: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "nginx.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "nginx.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + name: {{ .Chart.Name }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + ports: + - containerPort: 8080 + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + volumes: + - name: nginx-config + configMap: + name: {{ include "nginx.fullname" . }} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/hpa.yaml b/charts/operator-wandb/charts/nginx/templates/hpa.yaml new file mode 100644 index 000000000..14a972319 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/hpa.yaml @@ -0,0 +1,22 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "nginx.fullname" . }} + namespace: {{ $.Release.Namespace }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "nginx.fullname" . }} + minReplicas: 1 + maxReplicas: 1 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 \ No newline at end of file diff --git a/charts/operator-wandb/charts/nginx/templates/service.yaml b/charts/operator-wandb/charts/nginx/templates/service.yaml new file mode 100644 index 000000000..b037cdd04 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "nginx.fullname" . }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- include "nginx.labels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - protocol: TCP + port: 80 + targetPort: 8080 + selector: + {{- include "nginx.labels" . | nindent 4 }} diff --git a/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml b/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml new file mode 100644 index 000000000..9add01ef0 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "nginx.serviceAccountName" . }} + namespace: {{ $.Release.Namespace }} + labels: + {{- include "nginx.commonLabels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} diff --git a/charts/operator-wandb/charts/nginx/values.yaml b/charts/operator-wandb/charts/nginx/values.yaml new file mode 100644 index 000000000..37805c8e5 --- /dev/null +++ b/charts/operator-wandb/charts/nginx/values.yaml @@ -0,0 +1,48 @@ +enabled: true +nameOverride: "" +fullnameOverride: "" + +image: + repository: nginxinc/nginx-unprivileged + tag: latest + pullPolicy: Always + # pullSecrets: [] + +# Tolerations for pod scheduling +tolerations: [] + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + annotations: {} + +common: + labels: {} + +deployment: + labels: {} + annotations: {} + +service: + type: ClusterIP + annotations: {} + labels: {} + +resources: + # We usually recommend not to specify default resources and to leave this as a + # conscious choice for the user. This also increases chances charts run on + # environments with little resources, such as Minikube. If you do want to + # specify resources, uncomment the following lines, adjust them as necessary, + # and remove the curly braces after 'resources:'. + requests: + cpu: 100m + memory: 1Gi + limits: + cpu: 4000m + memory: 8Gi + +serviceAccount: + create: true +configMap: {} \ No newline at end of file diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index be2c1a5b3..55748575f 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -145,6 +145,9 @@ app: repository: wandb/local tag: latest +nginx: + install: true + weave: install: true image: From 66db270edd502b9d3620eb13f305fa2452af02e1 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:43:39 -0700 Subject: [PATCH 10/16] fix: Do not install nginx by default (#144) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index c11dbe198..fd644fa0b 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.8 +version: 0.13.9 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 55748575f..18046f719 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -146,7 +146,7 @@ app: tag: latest nginx: - install: true + install: false weave: install: true From f88e87b29aa71b473a2ac43622a3840b76977411 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Wed, 5 Jun 2024 23:40:32 +0530 Subject: [PATCH 11/16] feat: added stackdriver support for gcp metrics (#141) Co-authored-by: Aditya Choudhari --- charts/operator-wandb/Chart.lock | 7 +- charts/operator-wandb/Chart.yaml | 6 +- .../charts/stackdriver/.helmignore | 23 ++++ .../charts/stackdriver/Chart.yaml | 5 + .../charts/stackdriver/templates/_helpers.tpl | 101 ++++++++++++++ .../stackdriver/templates/deployment.yaml | 123 ++++++++++++++++++ .../charts/stackdriver/templates/service.yaml | 23 ++++ .../stackdriver/templates/serviceaccount.yaml | 15 +++ .../charts/stackdriver/values.yaml | 102 +++++++++++++++ charts/operator-wandb/values.yaml | 9 ++ 10 files changed, 411 insertions(+), 3 deletions(-) create mode 100644 charts/operator-wandb/charts/stackdriver/.helmignore create mode 100644 charts/operator-wandb/charts/stackdriver/Chart.yaml create mode 100644 charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl create mode 100644 charts/operator-wandb/charts/stackdriver/templates/deployment.yaml create mode 100644 charts/operator-wandb/charts/stackdriver/templates/service.yaml create mode 100644 charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml create mode 100644 charts/operator-wandb/charts/stackdriver/values.yaml diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index e0e233a29..6cc7c0945 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -32,5 +32,8 @@ dependencies: - name: nginx repository: file://charts/nginx version: 0.1.0 -digest: sha256:c6f5bb38d125207b54902f5638451b55807074ea700ff8eea4ea0af2dc19e346 -generated: "2024-05-17T16:55:33.068309+05:30" +- name: stackdriver + repository: file://charts/stackdriver + version: 0.1.0 +digest: sha256:9a6c69506deb6969686d5b220a0692b53cfa29642e059bdf27c440c5d7086bdb +generated: "2024-06-05T11:04:02.508473-07:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index fd644fa0b..6dcf845f1 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.9 +version: 0.13.10 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -56,3 +56,7 @@ dependencies: version: "*.*.*" repository: file://charts/nginx condition: nginx.install + - name: stackdriver + version: "*.*.*" + repository: file://charts/stackdriver + condition: stackdriver.install diff --git a/charts/operator-wandb/charts/stackdriver/.helmignore b/charts/operator-wandb/charts/stackdriver/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/stackdriver/Chart.yaml b/charts/operator-wandb/charts/stackdriver/Chart.yaml new file mode 100644 index 000000000..0bfa35817 --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: stackdriver +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "0.15.0" diff --git a/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl b/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl new file mode 100644 index 000000000..3b833602e --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl @@ -0,0 +1,101 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "stackdriver.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "stackdriver.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "stackdriver.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "stackdriver.labels" -}} +helm.sh/chart: {{ include "stackdriver.chart" . }} +{{ include "stackdriver.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "stackdriver.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "stackdriver.selectorLabels" -}} +app.kubernetes.io/name: {{ include "stackdriver.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "stackdriver.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "stackdriver.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "stackdriver.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "stackdriver.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "stackdriver.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} diff --git a/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml b/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml new file mode 100644 index 000000000..6e5ee2a4f --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/deployment.yaml @@ -0,0 +1,123 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "stackdriver.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "stackdriver.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "stackdriver.commonLabels" . | nindent 8 }} + {{- include "stackdriver.podLabels" . | nindent 8 }} + {{- include "stackdriver.labels" . | nindent 8 }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: ["stackdriver_exporter"] + volumeMounts: + {{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }} + - name: stackdriver-service-account + mountPath: /etc/secrets/service-account/ + {{- end}} + args: + - --google.project-id={{ .Values.stackdriver.projectId }} + - --monitoring.metrics-interval={{ .Values.stackdriver.metrics.interval }} + - --monitoring.metrics-offset={{ .Values.stackdriver.metrics.offset }} + - --monitoring.metrics-type-prefixes={{ .Values.stackdriver.metrics.typePrefixes | replace " " "" }} + {{- range .Values.stackdriver.metrics.filters }} + - --monitoring.filters={{ . }} + {{- end }} + - --stackdriver.backoff-jitter={{ .Values.stackdriver.backoffJitter }} + - --stackdriver.http-timeout={{ .Values.stackdriver.httpTimeout }} + - --stackdriver.max-backoff={{ .Values.stackdriver.maxBackoff }} + - --stackdriver.max-retries={{ .Values.stackdriver.maxRetries }} + - --stackdriver.retry-statuses={{ .Values.stackdriver.retryStatuses }} + - --web.listen-address={{ .Values.web.listenAddress }} + - --web.telemetry-path={{ .Values.web.path }} + {{- if .Values.stackdriver.dropDelegatedProjects }} + - --monitoring.drop-delegated-projects + {{- end }} + {{- if .Values.stackdriver.metrics.ingestDelay }} + - --monitoring.metrics-ingest-delay + {{- end }} + {{- if .Values.stackdriver.metrics.aggregateDeltas }} + - --monitoring.aggregate-deltas + - --monitoring.aggregate-deltas-ttl={{ .Values.stackdriver.metrics.aggregateDeltasTTL }} + {{- end }} + {{- if .Values.extraArgs }} + {{- range $key, $value := .Values.extraArgs }} + {{- if $value }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- end }} + {{- end }} + {{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }} + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /etc/secrets/service-account/credentials.json + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + ports: + - containerPort: {{ .Values.service.httpPort }} + name: http + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + timeoutSeconds: 10 + restartPolicy: {{ .Values.restartPolicy }} + serviceAccountName: {{ include "stackdriver.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + volumes: + {{- if .Values.stackdriver.serviceAccountSecret }} + - name: stackdriver-service-account + secret: + secretName: {{ .Values.stackdriver.serviceAccountSecret | quote }} + {{- if and (.Values.stackdriver.serviceAccountSecret) (.Values.stackdriver.serviceAccountSecretKey) }} + items: + - key: {{ .Values.stackdriver.serviceAccountSecretKey | quote }} + path: credentials.json + {{- end }} + {{- else if .Values.stackdriver.serviceAccountKey }} + - name: stackdriver-service-account + secret: + secretName: {{ template "stackdriver-exporter.fullname" . }} + {{- end}} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/templates/service.yaml b/charts/operator-wandb/charts/stackdriver/templates/service.yaml new file mode 100644 index 000000000..75915f03b --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "stackdriver.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 9255 + protocol: TCP + name: stackdriver + selector: + {{- include "stackdriver.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml b/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml new file mode 100644 index 000000000..e4638ff2f --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "stackdriver.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "stackdriver.commonLabels" . | nindent 4 }} + {{- include "stackdriver.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/stackdriver/values.yaml b/charts/operator-wandb/charts/stackdriver/values.yaml new file mode 100644 index 000000000..4a248f93a --- /dev/null +++ b/charts/operator-wandb/charts/stackdriver/values.yaml @@ -0,0 +1,102 @@ +enabled: true + +nameOverride: "" +fullnameOverride: "" + +image: + repository: prometheuscommunity/stackdriver-exporter + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: latest + # pullSecrets: [] + +# Tolerations for pod scheduling +tolerations: [] + +restartPolicy: Always +replicaCount: 1 + +extraEnv: {} + +extraEnvFrom: {} + +extraArgs: {} + +stackdriver: + # The Google Project ID to gather metrics for + projectId: "FALSE" + # An existing secret which contains credentials.json + serviceAccountSecret: "" + # Provide custom key for the existing secret to load credentials.json from + serviceAccountSecretKey: "" + # A service account key JSON file. Must be provided when no existing secret is used, in this case a new secret will be created holding this service account + serviceAccountKey: "" + # Max number of retries that should be attempted on 503 errors from Stackdriver + maxRetries: 0 + # How long should Stackdriver_exporter wait for a result from the Stackdriver API + httpTimeout: 10s + # Max time between each request in an exp backoff scenario + maxBackoff: 5s + # The amount of jitter to introduce in an exp backoff scenario + backoffJitter: 1s + # The HTTP statuses that should trigger a retry + retryStatuses: 503 + # Drop metrics from attached projects and fetch `project_id` only + dropDelegatedProjects: false + metrics: + # The prefixes to gather metrics for, we default to just CPU metrics. + typePrefixes: 'cloudsql.googleapis.com/database,redis' + # The filters to refine the metrics query by using Filter objects that Google provides. + # Filter objects: project, group.id, resource.type, resource.labels.[KEY], metric.type, metric.labels.[KEY] + # https://cloud.google.com/monitoring/api/v3/filters + filters: [] + # - 'pubsub.googleapis.com/subscription:resource.labels.subscription_id=monitoring.regex.full_match("us-west4.*my-team.*")' + # The frequency to request + interval: '5m' + # How far into the past to offset + offset: '0s' + # Offset for the Google Stackdriver Monitoring Metrics interval into the past by the ingest delay from the metric's metadata. + ingestDelay: false + # If enabled will treat all DELTA metrics as an in-memory counter instead of a gauge. + aggregateDeltas: false + # How long should a delta metric continue to be exported after GCP stops producing a metric + aggregateDeltasTTL: '30m' + +web: + # Port to listen on + listenAddress: ':9255' + # Path under which to expose metrics. + path: /metrics + +secret: + labels: {} +customLabels: {} + # app: prometheus-stackdriver-exporter + +service: + type: ClusterIP + httpPort: 9255 + annotations: {} + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + +deployment: {} + +serviceAccount: + create: true + +common: + labels: {} + annotations: {} + +resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 500m + memory: 500Mi \ No newline at end of file diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 18046f719..6c56efe09 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -198,6 +198,15 @@ prometheus: mysql-exporter: install: true +stackdriver: + install: true + pod: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9255" + prometheus.io/path: "/metrics" + prometheus.io/scheme: http + otel: install: true From edbc56c7de09ccc4afc54482e665305f0bd46943 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Mon, 10 Jun 2024 14:07:27 -0700 Subject: [PATCH 12/16] fix: Templatize http proxy container (#145) --- charts/operator/Chart.yaml | 2 +- charts/operator/templates/deployment.yaml | 2 +- charts/operator/values.yaml | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/charts/operator/Chart.yaml b/charts/operator/Chart.yaml index a39464633..73a9dad16 100644 --- a/charts/operator/Chart.yaml +++ b/charts/operator/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator description: A Helm chart for Weights & Biases operator type: application -version: 1.1.3 +version: 1.1.4 appVersion: "1.0.0" maintainers: - name: wandb diff --git a/charts/operator/templates/deployment.yaml b/charts/operator/templates/deployment.yaml index 41ea132b0..d72ba7b49 100644 --- a/charts/operator/templates/deployment.yaml +++ b/charts/operator/templates/deployment.yaml @@ -67,7 +67,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=10 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.5.0 + image: {{ .Values.rbacProxy.image.repository }}:{{ .Values.rbacProxy.image.tag }} name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/charts/operator/values.yaml b/charts/operator/values.yaml index 3748f2cc2..30e80bfc5 100644 --- a/charts/operator/values.yaml +++ b/charts/operator/values.yaml @@ -33,4 +33,9 @@ clusterRole: resources: ["*"] verbs: ["*"] +rbacProxy: + image: + repository: gcr.io/kubebuilder/kube-rbac-proxy + tag: v0.5.0 + airgapped: false From 55d18d98e211db8cf2a757b305921ea5a769812d Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Mon, 17 Jun 2024 09:51:36 -0700 Subject: [PATCH 13/16] fix: Remove empty attributes (#143) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/otel/charts/agent/templates/_config.tpl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 6dcf845f1..7c58dfc59 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.10 +version: 0.13.11 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl b/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl index 9c7dea59d..c6ee0876a 100644 --- a/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl +++ b/charts/operator-wandb/charts/otel/charts/agent/templates/_config.tpl @@ -48,7 +48,6 @@ extensions: {{- define "otelAgent.processors" -}} processors: batch: {} - attributes: {} memory_limiter: check_interval: 5s limit_percentage: 80 From 836d3d98c61c97a1599545b396afb2b679ec3747 Mon Sep 17 00:00:00 2001 From: KyleGoyette Date: Mon, 17 Jun 2024 10:02:29 -0700 Subject: [PATCH 14/16] chore(launch): Bump agent image version (#147) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 1049e168e..2df6c49ae 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.4 +version: 0.13.5 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 529a6789b..52b0a8c5f 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -5,7 +5,7 @@ agent: # Providing API key can be done external to this chart useExternalWandbSecret: false # Container image to use for the agent. - image: wandb/launch-agent:0.16.6 + image: wandb/launch-agent:0.17.1 # Image pull policy for agent image. imagePullPolicy: Always # Resources block for the agent spec. From 94f0b493953362a92fb9139e31ea61f969e43e35 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:11:05 -0700 Subject: [PATCH 15/16] chore: Default mysql to false (#149) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 7c58dfc59..bed259c45 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.11 +version: 0.13.12 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 6c56efe09..a56878552 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -167,7 +167,7 @@ flat-run-fields-updater: tag: latest mysql: - install: true + install: false persistence: size: 20Gi storageClass: "" From 94483a7dd14d60e570dc4852b7af9abe538a0079 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Thu, 20 Jun 2024 11:08:48 -0700 Subject: [PATCH 16/16] fix: Pass all oidc vars (#150) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/app/templates/deployment.yaml | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index bed259c45..547b7147a 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.12 +version: 0.13.13 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index 3e66dc743..c543ecccc 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -158,9 +158,11 @@ spec: - name: OIDC_CLIENT_ID value: {{ .Values.global.auth.oidc.clientId }} - name: OIDC_AUTH_METHOD - value: {{ .Values.global.auth.oidc.method }} + value: {{ .Values.global.auth.oidc.authMethod }} - name: OIDC_ISSUER value: {{ .Values.global.auth.oidc.issuer }} + - name: OIDC_CLIENT_SECRET + value: {{ .Values.global.auth.oidc.secret }} {{- end }} - name: GORILLA_SESSION_LENGTH