From b5c6eb6b51f8241edd893e6ddc4e74e05d8b2f89 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Tue, 25 Jun 2024 04:51:34 +0530 Subject: [PATCH 01/28] feat: Added yace support for aws rds and redis metrics (#140) Co-authored-by: Aditya Choudhari Co-authored-by: amanpruthi --- charts/operator-wandb/Chart.lock | 7 +- charts/operator-wandb/Chart.yaml | 6 +- charts/operator-wandb/charts/yace/.helmignore | 23 ++++ charts/operator-wandb/charts/yace/Chart.yaml | 5 + .../charts/yace/templates/_helpers.tpl | 102 +++++++++++++++++ .../charts/yace/templates/configmap.yaml | 18 +++ .../charts/yace/templates/deployment.yaml | 89 +++++++++++++++ .../charts/yace/templates/service.yaml | 23 ++++ .../charts/yace/templates/serviceaccount.yaml | 15 +++ charts/operator-wandb/charts/yace/values.yaml | 105 ++++++++++++++++++ charts/operator-wandb/values.yaml | 9 ++ 11 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 charts/operator-wandb/charts/yace/.helmignore create mode 100644 charts/operator-wandb/charts/yace/Chart.yaml create mode 100644 charts/operator-wandb/charts/yace/templates/_helpers.tpl create mode 100644 charts/operator-wandb/charts/yace/templates/configmap.yaml create mode 100644 charts/operator-wandb/charts/yace/templates/deployment.yaml create mode 100644 charts/operator-wandb/charts/yace/templates/service.yaml create mode 100644 charts/operator-wandb/charts/yace/templates/serviceaccount.yaml create mode 100644 charts/operator-wandb/charts/yace/values.yaml diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index 6cc7c094..fd1f5101 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -35,5 +35,8 @@ dependencies: - name: stackdriver repository: file://charts/stackdriver version: 0.1.0 -digest: sha256:9a6c69506deb6969686d5b220a0692b53cfa29642e059bdf27c440c5d7086bdb -generated: "2024-06-05T11:04:02.508473-07:00" +- name: yace + repository: file://charts/yace + version: 0.1.0 +digest: sha256:c12c533a22b6f593a526e3060597ee1591bf5a2cac4ff03c588758e0dbc65d1e +generated: "2024-06-24T16:15:00.442236-07:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 547b7147..09d5a7f4 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.13 +version: 0.14.1 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -60,3 +60,7 @@ dependencies: version: "*.*.*" repository: file://charts/stackdriver condition: stackdriver.install + - name: yace + version: "*.*.*" + repository: file://charts/yace + condition: yace.install diff --git a/charts/operator-wandb/charts/yace/.helmignore b/charts/operator-wandb/charts/yace/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/yace/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/yace/Chart.yaml b/charts/operator-wandb/charts/yace/Chart.yaml new file mode 100644 index 00000000..12bf5488 --- /dev/null +++ b/charts/operator-wandb/charts/yace/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: yace +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "v0.60.0" diff --git a/charts/operator-wandb/charts/yace/templates/_helpers.tpl b/charts/operator-wandb/charts/yace/templates/_helpers.tpl new file mode 100644 index 00000000..351bdda5 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/_helpers.tpl @@ -0,0 +1,102 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "yace.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "yace.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "yace.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "yace.labels" -}} +helm.sh/chart: {{ include "yace.chart" . }} +{{ include "yace.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "yace.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "yace.selectorLabels" -}} +app.kubernetes.io/name: {{ include "yace.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "yace.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "yace.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "yace.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "yace.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "yace.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} + diff --git a/charts/operator-wandb/charts/yace/templates/configmap.yaml b/charts/operator-wandb/charts/yace/templates/configmap.yaml new file mode 100644 index 00000000..42fd5728 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.configMap.labels -}} + {{- toYaml .Values.configMap.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.configMap.annotations -}} + {{- toYaml .Values.configMap.annotations | nindent 4 }} + {{- end }} +data: + config.yml: | +{{- (tpl .Values.config $) | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/deployment.yaml b/charts/operator-wandb/charts/yace/templates/deployment.yaml new file mode 100644 index 00000000..3eb65681 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/deployment.yaml @@ -0,0 +1,89 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "yace.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "yace.commonLabels" . | nindent 8 }} + {{- include "yace.podLabels" . | nindent 8 }} + {{- include "yace.labels" . | nindent 8 }} + annotations: + checksum/configmap: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "yace.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - yace + - --config.file=/config/config.yml + - --scraping-interval=60 + ports: + - containerPort: 5000 + name: http + protocol: TCP + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - mountPath: /config + name: yace-config + volumes: + - name: config + configMap: + defaultMode: 420 + name: {{ include "yace.fullname" . }} + volumes: + - configMap: + defaultMode: 420 + name: {{ include "yace.fullname" . }} + name: yace-config +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/service.yaml b/charts/operator-wandb/charts/yace/templates/service.yaml new file mode 100644 index 00000000..268d9b22 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 5000 + protocol: TCP + name: yace + selector: + {{- include "yace.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml b/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml new file mode 100644 index 00000000..69eddbba --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "yace.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/values.yaml b/charts/operator-wandb/charts/yace/values.yaml new file mode 100644 index 00000000..e716a19c --- /dev/null +++ b/charts/operator-wandb/charts/yace/values.yaml @@ -0,0 +1,105 @@ +enabled: true + +nameOverride: "" +fullnameOverride: "" + +image: + registry: ghcr.io + repository: nerdswords/yet-another-cloudwatch-exporter + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: v0.60.0 + +# Tolerations for pod scheduling +tolerations: [] + +extraEnv: {} +extraEnvFrom: {} + + +config: |- + apiVersion: v1alpha1 + discovery: + jobs: + - type: AWS/ElastiCache + regions: + {{- range .Values.regions }} + - {{ . }} + {{- end }} + period: 60 + length: 60 + metrics: + - name: CPUUtilization + statistics: [Average] + - name: FreeableMemory + statistics: [Average] + - name: NetworkBytesIn + statistics: [Average] + - name: NetworkBytesOut + statistics: [Average] + - name: NetworkPacketsIn + statistics: [Average] + - name: NetworkPacketsOut + statistics: [Average] + - name: SwapUsage + statistics: [Average] + - name: CPUCreditUsage + statistics: [Average] + - type: AWS/RDS + regions: + {{- range .Values.regions }} + - {{ . }} + {{- end }} + period: 60 + length: 60 + metrics: + - name: CPUUtilization + statistics: [Maximum] + - name: DatabaseConnections + statistics: [Sum] + - name: FreeableMemory + statistics: [Average] + - name: FreeStorageSpace + statistics: [Average] + - name: ReadThroughput + statistics: [Average] + - name: WriteThroughput + statistics: [Average] + - name: ReadLatency + statistics: [Maximum] + - name: WriteLatency + statistics: [Maximum] + - name: ReadIOPS + statistics: [Average] + - name: WriteIOPS + statistics: [Average] +service: + type: ClusterIP + annotations: {} + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + +deployment: {} + +serviceAccount: + create: true + +common: + labels: {} + annotations: {} + +configMap: + labels: {} + annotations: {} + +resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 500m + memory: 500Mi diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index a5687855..0bb71fbf 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -171,6 +171,15 @@ mysql: persistence: size: 20Gi storageClass: "" +yace: + install: true + regions: ["ap-south-1"] + pod: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5000" + prometheus.io/path: "/metrics" + prometheus.io/scheme: http redis: install: true From 97180ef7dc752093dad95ef85a385c2d554b3f6d Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 27 Jun 2024 13:47:21 -0700 Subject: [PATCH 02/28] chore(launch-agent): bump agent image to 0.17.2 (#154) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 2df6c49a..22966ef1 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.5 +version: 0.13.6 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 52b0a8c5..8926d929 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -5,7 +5,7 @@ agent: # Providing API key can be done external to this chart useExternalWandbSecret: false # Container image to use for the agent. - image: wandb/launch-agent:0.17.1 + image: wandb/launch-agent:0.17.2 # Image pull policy for agent image. imagePullPolicy: Always # Resources block for the agent spec. From 2b68caec2f738aaccb13924a95c5de3951fcb2d1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 27 Jun 2024 14:22:34 -0700 Subject: [PATCH 03/28] chore(launch-agent): bump agent version to 0.17.3 (#155) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 22966ef1..2310a344 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.6 +version: 0.13.7 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 8926d929..36af11e9 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -5,7 +5,7 @@ agent: # Providing API key can be done external to this chart useExternalWandbSecret: false # Container image to use for the agent. - image: wandb/launch-agent:0.17.2 + image: wandb/launch-agent:0.17.3 # Image pull policy for agent image. imagePullPolicy: Always # Resources block for the agent spec. From c4a125d4680b1b0ea1710b96df4013d143d4facf Mon Sep 17 00:00:00 2001 From: KyleGoyette Date: Mon, 1 Jul 2024 13:10:23 -0700 Subject: [PATCH 04/28] chore(launch): Remove team mates that are no longer on the launch team from code owners for launch agent (#148) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index fb0114ee..a5922f0d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,2 @@ * @gls4 @jsbroks @nfoucha @vanpelt -/charts/launch-agent/ @bcsherma @gtarpenning @KyleGoyette @nickpenaranda @TimH98 @wandb-zacharyblasczyk \ No newline at end of file +/charts/launch-agent/ @bcsherma @KyleGoyette @TimH98 \ No newline at end of file From 91ad9f52f78ddf4f6c660337e81ab684c615717f Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:27:37 -0700 Subject: [PATCH 05/28] fix: Default metrics moniotors to false (#152) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/yace/templates/deployment.yaml | 2 +- charts/operator-wandb/values.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 09d5a7f4..0a92d82b 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.1 +version: 0.14.2 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/yace/templates/deployment.yaml b/charts/operator-wandb/charts/yace/templates/deployment.yaml index 3eb65681..4534f6c0 100644 --- a/charts/operator-wandb/charts/yace/templates/deployment.yaml +++ b/charts/operator-wandb/charts/yace/templates/deployment.yaml @@ -44,7 +44,7 @@ spec: {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" command: - yace - --config.file=/config/config.yml diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 0bb71fbf..d91f233a 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -172,7 +172,7 @@ mysql: size: 20Gi storageClass: "" yace: - install: true + install: false regions: ["ap-south-1"] pod: annotations: @@ -208,7 +208,7 @@ prometheus: install: true stackdriver: - install: true + install: false pod: annotations: prometheus.io/scrape: "true" From c426f576777ee177de596c875518461bea2b6afe Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:24:22 -0700 Subject: [PATCH 06/28] fix: Mount redis cert to parquet cron job (#158) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/parquet/templates/cron.yaml | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 0a92d82b..17c11e37 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.2 +version: 0.14.3 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 4d082e29..6c35a8f5 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -49,6 +49,12 @@ spec: "megabinary", "glue", ] + volumeMounts: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + mountPath: /etc/ssl/certs/redis_ca.pem + subPath: redis_ca.pem + {{- end }} env: - name: GORILLA_GLUE_EXECUTE value: "true" @@ -130,5 +136,14 @@ spec: fieldPath: status.hostIP {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} + volumes: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + secret: + secretName: "{{ include "wandb.redis.passwordSecret" . }}" + items: + - key: REDIS_CA_CERT + path: redis_ca.pem + {{- end }} restartPolicy: Never {{- end }} From dc8dd9b2fb791be5ef62aa255d4ad0bda6fcaf7c Mon Sep 17 00:00:00 2001 From: Josiah Lee Date: Tue, 2 Jul 2024 15:58:32 -0700 Subject: [PATCH 07/28] chore(weave): add weave public url (#161) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/weave/templates/deployment.yaml | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 17c11e37..b72b8579 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.3 +version: 0.14.4 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index a289ca9e..bfc3239c 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -54,13 +54,15 @@ spec: - name: ONLY_SERVICE value: weave - name: WANDB_BASE_URL + value: http://{{ include "weave.appFullname" . }}:8080 + - name: WANDB_PUBLIC_BASE_URL value: {{ .Values.global.host }} - name: WEAVE_LOG_FORMAT value: json - name: WEAVE_LOCAL_ARTIFACT_DIR value: /vol/weave/cache - name: WEAVE_AUTH_GRAPHQL_URL - value: http://{{ include "weave.appFullname" . }}.{{ $.Release.Namespace }}.svc.{{ .Values.app.clusterDomain }}:8080/graphql + value: http://{{ include "weave.appFullname" . }}:8080/graphql - name: WEAVE_SERVER_NUM_WORKERS value: "4" From 957ee870840c11d09bbd04754bd8af758553fa00 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 3 Jul 2024 09:59:54 -0700 Subject: [PATCH 08/28] fix(launch-agent): mount git creds as a subPath (#162) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/templates/deployment.yaml | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 2310a344..c39f34bf 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.7 +version: 0.13.8 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 15d1e754..4395f9be 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -106,7 +106,12 @@ spec: readOnly: true {{ if .Values.gitCreds}} - name: git-creds - mountPath: /home/launch_agent/ + mountPath: /home/launch_agent/.gitconfig + subPath: .gitconfig + readOnly: true + - name: git-creds + mountPath: /home/launch_agent/.git-credentials + subPath: .git-credentials readOnly: true {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} @@ -127,9 +132,6 @@ spec: - name: git-creds secret: secretName: git-creds - - name: git-config - secret: - secretName: git-config {{ end}} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} - name: custom-cabundle From 33572b1169d3b32e423298e72d4d831fc953c0bb Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Wed, 10 Jul 2024 09:03:33 -0700 Subject: [PATCH 09/28] chore(dev): redis certificate for flat run fields updater (#165) --- charts/operator-wandb/Chart.yaml | 2 +- .../flat-run-fields-updater/templates/deployment.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index b72b8579..355da233 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.4 +version: 0.14.5 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index e08f15de..a0a2bb73 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -51,6 +51,12 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "flat-run-fields-updater.fullname" . }}-redis-ca + mountPath: /etc/ssl/certs/redis_ca.pem + subPath: redis_ca.pem + {{- end }} env: - name: POD_NAME valueFrom: From 4fd8a139c165490e8650388fc115f1fec35eca87 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Wed, 10 Jul 2024 14:12:14 -0700 Subject: [PATCH 10/28] chore(dev): fix flat run fields updater redis volume (#167) --- charts/operator-wandb/Chart.yaml | 2 +- .../flat-run-fields-updater/templates/deployment.yaml | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 355da233..c0225a18 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.5 +version: 0.14.6 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index a0a2bb73..b82fe272 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -163,4 +163,13 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + volumes: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "flat-run-fields-updater.fullname" . }}-redis-ca + secret: + secretName: "{{ include "wandb.redis.passwordSecret" . }}" + items: + - key: REDIS_CA_CERT + path: redis_ca.pem + {{- end }} {{- end }} \ No newline at end of file From 23c026b74eee4f412696502dccce98ff2ef1e962 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Wed, 10 Jul 2024 16:13:08 -0700 Subject: [PATCH 11/28] chore(dev): make kafka partitions configurable (#164) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/app/templates/deployment.yaml | 4 +++- .../flat-run-fields-updater/templates/deployment.yaml | 4 +++- charts/operator-wandb/templates/_kafka.tpl | 7 +++++++ charts/operator-wandb/values.yaml | 3 +++ 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index c0225a18..7b03050f 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.6 +version: 0.14.7 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index c543eccc..b708897b 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -227,6 +227,8 @@ spec: key: KAFKA_CLIENT_PASSWORD - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} + - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS + value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} - name: OVERFLOW_BUCKET_ADDR value: "{{ include "app.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE @@ -237,7 +239,7 @@ spec: "name": "wandb", "prefix": "wandb-overflow" }, - "addr": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@$(KAFKA_BROKER_HOST):$(KAFKA_BROKER_PORT)/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?producer_batch_bytes=1048576" + "addr": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@$(KAFKA_BROKER_HOST):$(KAFKA_BROKER_PORT)/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?producer_batch_bytes=1048576&num_partitions=$(KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS)" } {{- include "app.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 12 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index b82fe272..3f4044d6 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -114,6 +114,8 @@ spec: key: KAFKA_CLIENT_PASSWORD - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} + - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS + value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE @@ -125,7 +127,7 @@ spec: "prefix": "wandb-overflow" }, "subscriptions": { - "flatRunFieldsUpdater": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@wandb-kafka:9092/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?consumer_group_id=default-group" + "flatRunFieldsUpdater": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@wandb-kafka:9092/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?consumer_group_id=default-group&num_partitions=$(KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS)" } } {{- if ne (include "wandb.redis.password" .) "" }} diff --git a/charts/operator-wandb/templates/_kafka.tpl b/charts/operator-wandb/templates/_kafka.tpl index 5c81299d..d53f45e6 100644 --- a/charts/operator-wandb/templates/_kafka.tpl +++ b/charts/operator-wandb/templates/_kafka.tpl @@ -59,3 +59,10 @@ Return the kafka topic name for run-updates-shadow {{- define "wandb.kafka.runUpdatesShadowTopic" -}} {{ printf "%s-%s" .Release.Name "run-updates-shadow" | trunc 63 | trimSuffix "-" }} {{- end -}} + +{{/* +Return the number of partitions for run-updates-shadow +*/}} +{{- define "wandb.kafka.runUpdatesShadowNumPartitions" -}} +{{- print .Values.global.kafka.runUpdatesShadowNumPartitions -}} +{{- end -}} diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index d91f233a..718388fe 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -101,6 +101,9 @@ global: brokerHost: "" brokerPort: 9092 runUpdatesShadowTopic: "" + # This value will only apply upon initial topic creation. + # If the topic already exists then changing the number of partitions is not possible. + runUpdatesShadowNumPartitions: 1 ingress: install: true From 1fb71bf18a5e8984c91c46d0942528063f743970 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Wed, 10 Jul 2024 16:31:47 -0700 Subject: [PATCH 12/28] chore(dev): fix frfu num partitions (#168) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/flat-run-fields-updater/templates/deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 7b03050f..9f1b683f 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.7 +version: 0.14.8 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index 3f4044d6..fc9519eb 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -115,7 +115,7 @@ spec: - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS - value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} + value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE From c6afe086f3859a411242025cbf5f5037f5efcfc6 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Thu, 11 Jul 2024 07:29:18 -0700 Subject: [PATCH 13/28] fix: add hc annotation to the console service in AWS clusters (#163) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/console/templates/service.yaml | 5 ++++- charts/operator-wandb/values.yaml | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 9f1b683f..0b43b7d4 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.8 +version: 0.14.9 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/console/templates/service.yaml b/charts/operator-wandb/charts/console/templates/service.yaml index 71c3a6a8..b4a4ab12 100644 --- a/charts/operator-wandb/charts/console/templates/service.yaml +++ b/charts/operator-wandb/charts/console/templates/service.yaml @@ -11,7 +11,10 @@ metadata: {{- toYaml .Values.service.labels | nindent 4 }} {{- end }} annotations: - {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if eq .Values.global.cloudProvider "aws" }} + alb.ingress.kubernetes.io/healthcheck-path: /console/api/ready + {{- end }} + {{- include "wandb.serviceAnnotations" $ | nindent 4 }} {{- if .Values.service.annotations -}} {{- toYaml .Values.service.annotations | nindent 4 }} {{- end }} diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 718388fe..39fc0c97 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -14,6 +14,8 @@ global: host: "http://localhost:8080" license: "" + cloudProvider: "" + storageClass: "" banners: From 4a08c85e3994136ac12f099f4ecfb92fe0b56491 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Thu, 11 Jul 2024 12:55:46 -0700 Subject: [PATCH 14/28] chore(dev): make frfu value a string (#169) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/flat-run-fields-updater/templates/deployment.yaml | 2 +- charts/operator-wandb/values.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 0b43b7d4..bbc7c507 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.9 +version: 0.14.10 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index fc9519eb..3f4044d6 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -115,7 +115,7 @@ spec: - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS - value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" + value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 39fc0c97..0e14e315 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -105,7 +105,7 @@ global: runUpdatesShadowTopic: "" # This value will only apply upon initial topic creation. # If the topic already exists then changing the number of partitions is not possible. - runUpdatesShadowNumPartitions: 1 + runUpdatesShadowNumPartitions: "1" ingress: install: true From 775e828d844dc86429aabf7d2a7b3d88649e4675 Mon Sep 17 00:00:00 2001 From: KyleGoyette Date: Thu, 11 Jul 2024 13:39:55 -0700 Subject: [PATCH 15/28] Support git creds secret (#170) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/README.md | 1 + charts/launch-agent/templates/configmap.yaml | 19 +++++- charts/launch-agent/templates/deployment.yaml | 59 +++++++++++++++---- charts/launch-agent/values.yaml | 8 +++ 5 files changed, 77 insertions(+), 12 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index c39f34bf..a12f65b3 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.8 +version: 0.13.9 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/README.md b/charts/launch-agent/README.md index cb3c46f4..bc270908 100644 --- a/charts/launch-agent/README.md +++ b/charts/launch-agent/README.md @@ -57,6 +57,7 @@ The table below describes all the available variables in the chart: | `launchConfig` | mutiline string | **Yes** | `null` | This should be set to the literal contents of your launch agent config. See the agent setup docs for details: https://docs.wandb.ai/guides/launch/setup-agent-advanced | | `volcano` | bool | No | `true` | Controls whether the volcano scheduler should be installed in your cluster along with the agent. Set to `false` to disable volcano installation. | | `gitCreds` | mutiline string | No | `null` | Contents of a git credentials file. | +| `sshAuthSecrets` | list(object) | No | `[]` | Name of secret containing an ssh-auth kubernetes secret and the associated host for the ssh key. | | `serviceAccount.annotations` | object | No | `null` | Annotations for the wandb service account. | | `azureStorageAccessKey` | string | No | "" | Azure storage access key required for kaniko to acces build contexts in azure blob storage. | | `additionalEnvVars` | map(string) | No | {} | Map with environment variables to be set in the Launch Agent pod. | diff --git a/charts/launch-agent/templates/configmap.yaml b/charts/launch-agent/templates/configmap.yaml index 6f5d3224..7af1d06f 100644 --- a/charts/launch-agent/templates/configmap.yaml +++ b/charts/launch-agent/templates/configmap.yaml @@ -8,4 +8,21 @@ kind: ConfigMap metadata: name: wandb-launch-configmap-{{ .Release.Name }} namespace: {{ .Values.namespace }} -... \ No newline at end of file +--- +{{- if .Values.sshAuthSecrets }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ssh-config-{{ .Release.Name }} + namespace: {{ .Values.namespace }} +data: + config: | + {{- range $index, $secret := .Values.sshAuthSecrets }} + Host {{ .host }} + IdentityFile /home/launch_agent/.ssh/id_repo{{ $index }} + IdentitiesOnly yes + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + {{- end }} +{{- end }} diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 4395f9be..19312367 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -18,13 +18,23 @@ spec: metadata: labels: app: launch-agent-{{ .Release.Name }} -{{- if .Values.agent.labels }} -{{- toYaml .Values.agent.labels | trim | nindent 8 }} -{{- end }} + {{- if .Values.agent.labels }} + {{- toYaml .Values.agent.labels | trim | nindent 8 }} + {{- end }} spec: serviceAccountName: wandb-launch-serviceaccount-{{ .Release.Name }} - {{- if .Values.kanikoPvcName }} + {{- if or .Values.sshAuthSecrets .Values.kanikoPvcName }} initContainers: + {{- end}} + {{- if .Values.sshAuthSecrets }} + - name: init-create-ssh-dir + image: {{ .Values.agent.image }} + command: ["sh", "-c", "mkdir -p /home/launch_agent/.ssh"] + volumeMounts: + - name: ssh-dir + mountPath: /home/launch_agent/.ssh + {{- end }} + {{- if .Values.kanikoPvcName }} - name: kaniko-volume-chown image: {{ .Values.agent.image }} command: ["sh", "-c"] @@ -114,6 +124,18 @@ spec: subPath: .git-credentials readOnly: true {{ end }} + {{ if .Values.sshAuthSecrets }} + {{- range $index, $secret := .Values.sshAuthSecrets }} + - name: git-ssh-key-secret-{{ $index }} + mountPath: /home/launch_agent/.ssh/id_repo{{ $index }} + subPath: id_repo{{ $index }} + readOnly: true + {{- end }} + - name: ssh-config + mountPath: /home/launch_agent/.ssh/config + subPath: config + readOnly: true + {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} - name: custom-cabundle mountPath: /usr/local/share/ca-certificates/custom-ca.crt @@ -124,15 +146,32 @@ spec: - name: kaniko-pvc mountPath: /home/launch_agent/kaniko {{ end }} + {{- if .Values.sshAuthSecrets }} + - name: ssh-dir + mountPath: /home/launch_agent/.ssh + {{ end }} volumes: - name: wandb-launch-config configMap: name: wandb-launch-configmap-{{ .Release.Name }} - {{ if .Values.gitCreds}} - - name: git-creds + {{ if .Values.sshAuthSecrets }} + {{- range $index, $secret := .Values.sshAuthSecrets }} + - name: git-ssh-key-secret-{{ $index }} secret: - secretName: git-creds - {{ end}} + secretName: {{ $secret.name }} + items: + - key: ssh-privatekey + path: id_repo{{ $index }} + {{- end }} + - name: ssh-config + configMap: + name: ssh-config-{{ .Release.Name }} + items: + - key: config + path: config + - name: ssh-dir + emptyDir: {} + {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} - name: custom-cabundle configMap: @@ -140,12 +179,12 @@ spec: items: - key: {{ .Values.customCABundle.configMap.key }} path: custom-ca.crt - {{- end}} + {{- end }} {{ if .Values.kanikoPvcName }} - name: kaniko-pvc persistentVolumeClaim: claimName: {{ .Values.kanikoPvcName }} - {{- end}} + {{- end }} nodeSelector: {{- toYaml .Values.agent.nodeSelector | nindent 8 }} tolerations: diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 36af11e9..03fc6d1f 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -63,6 +63,14 @@ additionalSecretEnvVars: # repos. Example: https://username:password@example.com gitCreds: | +# list of secrets for the agent to use for ssh auth +# format is a list of secret names and hosts secrets +# should be created ass ssh-auth secrets, +# see: https://kubernetes.io/docs/concepts/configuration/secret/#ssh-authentication-secrets +sshAuthSecrets: + # - name: secret-name + # host: example.com + # Annotations for the wandb service account. Useful when setting up workload identity on gcp. serviceAccount: annotations: From a8c97a2ae6e3674ba37873aaacf58b24306790cd Mon Sep 17 00:00:00 2001 From: KyleGoyette Date: Thu, 11 Jul 2024 15:04:44 -0700 Subject: [PATCH 16/28] fix git creds for launch-agent (#172) --- charts/launch-agent/Chart.yaml | 2 +- charts/launch-agent/templates/deployment.yaml | 15 +++++++++------ charts/launch-agent/values.yaml | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index a12f65b3..b136846d 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.9 +version: 0.13.10 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 19312367..851a051a 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -111,6 +111,8 @@ spec: value: {{ .Values.kanikoDockerConfigSecret }} {{- end }} volumeMounts: + - name: ssh-dir + mountPath: /home/launch_agent/.ssh - name: wandb-launch-config mountPath: /home/launch_agent/.config/wandb readOnly: true @@ -146,14 +148,17 @@ spec: - name: kaniko-pvc mountPath: /home/launch_agent/kaniko {{ end }} - {{- if .Values.sshAuthSecrets }} - - name: ssh-dir - mountPath: /home/launch_agent/.ssh - {{ end }} volumes: + - name: ssh-dir + emptyDir: {} - name: wandb-launch-config configMap: name: wandb-launch-configmap-{{ .Release.Name }} + {{ if .Values.gitCreds}} + - name: git-creds + secret: + secretName: git-creds + {{ end}} {{ if .Values.sshAuthSecrets }} {{- range $index, $secret := .Values.sshAuthSecrets }} - name: git-ssh-key-secret-{{ $index }} @@ -169,8 +174,6 @@ spec: items: - key: config path: config - - name: ssh-dir - emptyDir: {} {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} - name: custom-cabundle diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 03fc6d1f..5872081e 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -65,7 +65,7 @@ gitCreds: | # list of secrets for the agent to use for ssh auth # format is a list of secret names and hosts secrets -# should be created ass ssh-auth secrets, +# should be created ass ssh-auth secrets, # see: https://kubernetes.io/docs/concepts/configuration/secret/#ssh-authentication-secrets sshAuthSecrets: # - name: secret-name From 4ce5d022f6f8806ce24a26304ecb8cc6913a2907 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Fri, 12 Jul 2024 11:40:03 -0700 Subject: [PATCH 17/28] chore(dev): kafka consumer terminationGracePeriodSeconds (#171) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/flat-run-fields-updater/templates/deployment.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index bbc7c507..0b92ce18 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.10 +version: 0.14.11 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index 3f4044d6..03a526bd 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -45,6 +45,8 @@ spec: {{- include "wandb.nodeSelector" . | nindent 6 }} {{- include "wandb.priorityClassName" . | nindent 6 }} {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + # needed to ensure ensure Kafka consumers handle pod termination gracefully and avoid data loss + terminationGracePeriodSeconds: 60 containers: - name: {{ .Chart.Name }} securityContext: From ff1010c43ba4d69b2e0d31290aa5ed0a9c2b861b Mon Sep 17 00:00:00 2001 From: mkaesz-wandb <165807238+mkaesz-wandb@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:07:46 +0200 Subject: [PATCH 18/28] fix: added missing mysql port variable to init container (#146) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/app/templates/deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 0b92ce18..c67477e0 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.11 +version: 0.14.12 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index b708897b..2cf9172b 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -63,7 +63,7 @@ spec: secretKeyRef: name: {{ include "wandb.mysql.passwordSecret" . }} key: MYSQL_PASSWORD - command: ['bash', '-c', "until mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASSWORD -D$MYSQL_DATABASE --execute=\"SELECT 1\"; do echo waiting for db; sleep 2; done"] + command: ['bash', '-c', "until mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASSWORD -D$MYSQL_DATABASE -P$MYSQL_PORT --execute=\"SELECT 1\"; do echo waiting for db; sleep 2; done"] containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" From 2ec7b52feb5c3fa685b5f58b9d766ae9478e7e7e Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Tue, 16 Jul 2024 09:55:58 -0700 Subject: [PATCH 19/28] chore(dev): run updates shadow partitions (#175) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/app/templates/deployment.yaml | 2 +- .../charts/flat-run-fields-updater/templates/deployment.yaml | 2 +- charts/operator-wandb/values.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index c67477e0..5a27ff51 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.12 +version: 0.14.13 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index 2cf9172b..13d88a22 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -228,7 +228,7 @@ spec: - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS - value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} + value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: OVERFLOW_BUCKET_ADDR value: "{{ include "app.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index 03a526bd..931851ef 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -117,7 +117,7 @@ spec: - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS - value: {{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}} + value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 0e14e315..39fc0c97 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -105,7 +105,7 @@ global: runUpdatesShadowTopic: "" # This value will only apply upon initial topic creation. # If the topic already exists then changing the number of partitions is not possible. - runUpdatesShadowNumPartitions: "1" + runUpdatesShadowNumPartitions: 1 ingress: install: true From 55dd849d53afb23009765edcfdcb869f942d3754 Mon Sep 17 00:00:00 2001 From: Jessica Xiang Date: Wed, 17 Jul 2024 09:07:18 -0700 Subject: [PATCH 20/28] chore(dev): add AWS_REGION to frfu deployment (#176) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/flat-run-fields-updater/templates/deployment.yaml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 5a27ff51..ae4ce516 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.13 +version: 0.14.14 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index 931851ef..01fe5040 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -120,6 +120,10 @@ spec: value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" + - name: AWS_REGION + value: {{ .Values.global.bucket.region }} + - name: AWS_S3_KMS_ID + value: "{{ .Values.global.bucket.kmsKey }}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE value: > { From 3a81fd5028ca3301faefd4b017e34e430918d625 Mon Sep 17 00:00:00 2001 From: Nick Penaranda Date: Wed, 17 Jul 2024 12:24:17 -0400 Subject: [PATCH 21/28] fix: HPA could not scale weave (#177) --- charts/operator-wandb/Chart.yaml | 2 +- .../operator-wandb/charts/weave/templates/deployment.yaml | 3 +++ charts/operator-wandb/charts/weave/values.yaml | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index ae4ce516..ec0c4d4a 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.14 +version: 0.14.15 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index bfc3239c..f459afc2 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -107,6 +107,9 @@ spec: - name: cache mountPath: /vol/weave/cache + resources: + {{- toYaml .Values.cacheClear.resources | nindent 12 }} + volumes: - name: cache emptyDir: diff --git a/charts/operator-wandb/charts/weave/values.yaml b/charts/operator-wandb/charts/weave/values.yaml index 542fd2c4..cb138c3b 100644 --- a/charts/operator-wandb/charts/weave/values.yaml +++ b/charts/operator-wandb/charts/weave/values.yaml @@ -57,3 +57,9 @@ cache: intervalInHours: 24 size: 20Gi medium: "" + +cacheClear: + resources: + requests: + cpu: 100m + memory: 128Mi \ No newline at end of file From 6a8df136066d848568b5e3bc7e73ffcc9b01bf21 Mon Sep 17 00:00:00 2001 From: Josiah Lee Date: Wed, 17 Jul 2024 09:39:29 -0700 Subject: [PATCH 22/28] chore(weave): fix weave cache import (#159) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/weave/templates/deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index ec0c4d4a..68c01749 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.15 +version: 0.14.16 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index f459afc2..0999ca80 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -92,7 +92,7 @@ spec: - name: {{ include "weave.fullname" . }}-cache-clear image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - command: ["python", "-m", "weave.clear_cache"] + command: ["python", "-m", "weave.legacy.clear_cache"] env: - name: WEAVE_LOCAL_ARTIFACT_DIR From e521ab8ae474dbd92a14e5c73a378f29e05a5fc1 Mon Sep 17 00:00:00 2001 From: levinandrew Date: Thu, 18 Jul 2024 13:58:35 -0700 Subject: [PATCH 23/28] feat(backend): enable subpath in GCP (#173) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/app/templates/_helpers.tpl | 2 +- .../charts/flat-run-fields-updater/templates/_helpers.tpl | 2 +- charts/operator-wandb/charts/parquet/templates/_helpers.tpl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 68c01749..4e14c641 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.16 +version: 0.14.17 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/_helpers.tpl b/charts/operator-wandb/charts/app/templates/_helpers.tpl index 589cf9f0..b001fe0c 100644 --- a/charts/operator-wandb/charts/app/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/app/templates/_helpers.tpl @@ -116,7 +116,7 @@ app deployments. {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl b/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl index a52b8aa3..61c54a41 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl @@ -112,7 +112,7 @@ Create the name of the service account to use {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} diff --git a/charts/operator-wandb/charts/parquet/templates/_helpers.tpl b/charts/operator-wandb/charts/parquet/templates/_helpers.tpl index c2a077e3..32414268 100644 --- a/charts/operator-wandb/charts/parquet/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/parquet/templates/_helpers.tpl @@ -116,7 +116,7 @@ app deployments. {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} From 314ff0e96b1b6b05fdc070365fe6edf33f297134 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Thu, 18 Jul 2024 14:43:49 -0700 Subject: [PATCH 24/28] fix: Normalize the service account create logic for all wandb applications (#178) --- charts/operator-wandb/Chart.yaml | 2 +- .../charts/app/templates/serviceaccount.yaml | 2 ++ charts/operator-wandb/charts/app/values.yaml | 1 + .../charts/console/templates/deployment.yaml | 2 +- .../console/templates/serviceaccount.yaml | 2 ++ .../operator-wandb/charts/console/values.yaml | 4 +++- .../templates/serviceaccount.yaml | 1 + .../charts/parquet/templates/deployment.yaml | 1 + .../parquet/templates/serviceaccount.yaml | 17 +++++++++++++++++ .../operator-wandb/charts/parquet/values.yaml | 4 +++- 10 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 4e14c641..15f5ee5e 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.14.17 +version: 0.15.0 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/serviceaccount.yaml b/charts/operator-wandb/charts/app/templates/serviceaccount.yaml index c3824d12..f0639978 100644 --- a/charts/operator-wandb/charts/app/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/app/templates/serviceaccount.yaml @@ -1,3 +1,4 @@ +{{- if .Values.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: @@ -14,3 +15,4 @@ metadata: {{- if .Values.serviceAccount.annotations -}} {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} {{- end }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/app/values.yaml b/charts/operator-wandb/charts/app/values.yaml index 9afff84c..ffc8d8fb 100644 --- a/charts/operator-wandb/charts/app/values.yaml +++ b/charts/operator-wandb/charts/app/values.yaml @@ -53,6 +53,7 @@ resources: serviceAccount: create: true + annotations: {} role: {} roleBinding: {} diff --git a/charts/operator-wandb/charts/console/templates/deployment.yaml b/charts/operator-wandb/charts/console/templates/deployment.yaml index 835a727d..54c983b4 100644 --- a/charts/operator-wandb/charts/console/templates/deployment.yaml +++ b/charts/operator-wandb/charts/console/templates/deployment.yaml @@ -34,7 +34,7 @@ spec: {{- toYaml .Values.pod.annotations | nindent 4 }} {{- end }} spec: - serviceAccountName: {{ include "console.fullname" . }} + serviceAccountName: {{ include "console.serviceAccountName" . }} {{- if .tolerations }} tolerations: {{- toYaml .tolerations | nindent 8 }} diff --git a/charts/operator-wandb/charts/console/templates/serviceaccount.yaml b/charts/operator-wandb/charts/console/templates/serviceaccount.yaml index 2eea38c2..3cda2894 100644 --- a/charts/operator-wandb/charts/console/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/console/templates/serviceaccount.yaml @@ -1,3 +1,4 @@ +{{- if .Values.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: @@ -14,3 +15,4 @@ metadata: {{- if .Values.serviceAccount.annotations -}} {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/console/values.yaml b/charts/operator-wandb/charts/console/values.yaml index ea23750e..827f646a 100644 --- a/charts/operator-wandb/charts/console/values.yaml +++ b/charts/operator-wandb/charts/console/values.yaml @@ -21,7 +21,9 @@ extraCors: [] common: labels: {} deployment: {} -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} pod: diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml index 18053197..13cca672 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml @@ -3,6 +3,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "flat-run-fields-updater.serviceAccountName" . }} + namespace: {{ $.Release.Namespace }} labels: {{- include "wandb.commonLabels" . | nindent 4 }} {{- include "flat-run-fields-updater.commonLabels" . | nindent 4 }} diff --git a/charts/operator-wandb/charts/parquet/templates/deployment.yaml b/charts/operator-wandb/charts/parquet/templates/deployment.yaml index e37e973f..ddfd26cf 100644 --- a/charts/operator-wandb/charts/parquet/templates/deployment.yaml +++ b/charts/operator-wandb/charts/parquet/templates/deployment.yaml @@ -140,6 +140,7 @@ spec: resources: {{- toYaml .Values.resources | nindent 12 }} + serviceAccountName: {{ include "parquet.serviceAccountName" . }} volumes: {{- if ne (include "wandb.redis.caCert" .) "" }} - name: {{ include "parquet.fullname" . }}-redis-ca diff --git a/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml b/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml new file mode 100644 index 00000000..0a1caf7e --- /dev/null +++ b/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "parquet.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "parquet.commonLabels" . | nindent 4 }} + {{- include "parquet.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/parquet/values.yaml b/charts/operator-wandb/charts/parquet/values.yaml index 591417fa..547a3700 100644 --- a/charts/operator-wandb/charts/parquet/values.yaml +++ b/charts/operator-wandb/charts/parquet/values.yaml @@ -22,7 +22,9 @@ cronJob: exportHistoryToParquet: enabled: false schedule: "11 * * * *" -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} service: From 0dfc4089f2f8168f2d7f976131ecd699464697ed Mon Sep 17 00:00:00 2001 From: Nick Penaranda Date: Fri, 19 Jul 2024 09:57:17 -0400 Subject: [PATCH 25/28] feat(weave): Add weave-trace (#157) Co-authored-by: Justin Brooks --- charts/operator-wandb/Chart.lock | 7 +- charts/operator-wandb/Chart.yaml | 6 +- .../charts/app/templates/deployment.yaml | 4 + .../charts/weave-trace/.helmignore | 23 +++ .../charts/weave-trace/Chart.yaml | 5 + .../charts/weave-trace/templates/_helpers.tpl | 110 ++++++++++++ .../weave-trace/templates/deployment.yaml | 168 ++++++++++++++++++ .../weave-trace/templates/migrate-hook.yaml | 68 +++++++ .../charts/weave-trace/templates/service.yaml | 24 +++ .../charts/weave-trace/values.yaml | 53 ++++++ .../charts/weave/templates/_helpers.tpl | 18 -- .../charts/weave/templates/deployment.yaml | 6 +- .../operator-wandb/templates/_clickhouse.tpl | 23 +++ .../operator-wandb/templates/clickhouse.yaml | 10 ++ charts/operator-wandb/templates/ingress.yaml | 9 + charts/operator-wandb/values.yaml | 19 ++ 16 files changed, 528 insertions(+), 25 deletions(-) create mode 100644 charts/operator-wandb/charts/weave-trace/.helmignore create mode 100644 charts/operator-wandb/charts/weave-trace/Chart.yaml create mode 100644 charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl create mode 100644 charts/operator-wandb/charts/weave-trace/templates/deployment.yaml create mode 100644 charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml create mode 100644 charts/operator-wandb/charts/weave-trace/templates/service.yaml create mode 100644 charts/operator-wandb/charts/weave-trace/values.yaml create mode 100644 charts/operator-wandb/templates/_clickhouse.tpl create mode 100644 charts/operator-wandb/templates/clickhouse.yaml diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index fd1f5101..13ec8ee7 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -8,6 +8,9 @@ dependencies: - name: weave repository: file://charts/weave version: 0.1.0 +- name: weave-trace + repository: file://charts/weave-trace + version: 0.1.0 - name: parquet repository: file://charts/parquet version: 0.1.0 @@ -38,5 +41,5 @@ dependencies: - name: yace repository: file://charts/yace version: 0.1.0 -digest: sha256:c12c533a22b6f593a526e3060597ee1591bf5a2cac4ff03c588758e0dbc65d1e -generated: "2024-06-24T16:15:00.442236-07:00" +digest: sha256:bca2b6781737da6806e4485605cf9ce87b1428944b14cb88f082024cc3500bbd +generated: "2024-07-18T01:17:04.532871-04:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 15f5ee5e..bc1bc346 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.15.0 +version: 0.15.1 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -24,6 +24,10 @@ dependencies: version: "*.*.*" repository: file://charts/weave condition: weave.install + - name: weave-trace + version: "*.*.*" + repository: file://charts/weave-trace + condition: weave-trace.install - name: parquet version: "*.*.*" repository: file://charts/parquet diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index 13d88a22..5daf17a6 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -114,6 +114,10 @@ spec: value: "http://{{ .Release.Name }}-parquet:8087" - name: PARQUET_ENABLED value: "true" + {{- if index .Values.global "weave-trace" "enabled" }} + - name: WEAVE_TRACES_ENABLED + value: "true" + {{- end }} {{- if ne (include "wandb.redis.password" .) "" }} - name: REDIS_PASSWORD diff --git a/charts/operator-wandb/charts/weave-trace/.helmignore b/charts/operator-wandb/charts/weave-trace/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/weave-trace/Chart.yaml b/charts/operator-wandb/charts/weave-trace/Chart.yaml new file mode 100644 index 00000000..4f8851e8 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: weave-trace +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "1.0.0" diff --git a/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl b/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl new file mode 100644 index 00000000..4b6f1096 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl @@ -0,0 +1,110 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "weaveTrace.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified name for weave-trace. (Should be something like wandb-weave-trace) +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "weaveTrace.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create a default fully qualified name for the weave-trace migration. (Should be something like wandb-weave-trace-migrate) +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "weaveTraceMigrate.fullname" -}} +{{ printf "%s-migrate" (include "weaveTrace.fullname" .) | trunc 63 | trimSuffix "-" }} +{{- end }} + + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "weaveTrace.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "weaveTrace.labels" -}} +helm.sh/chart: {{ include "weaveTrace.chart" . }} +{{ include "weaveTrace.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "weaveTrace.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "weaveTrace.selectorLabels" -}} +app.kubernetes.io/name: {{ include "weaveTrace.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "weaveTrace.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "weaveTrace.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "weaveTrace.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "weaveTrace.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "weaveTrace.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} diff --git a/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml new file mode 100644 index 00000000..a606fca2 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml @@ -0,0 +1,168 @@ +{{- $imageCfg := dict "global" $.Values.global.image "local" $.Values.image -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "weaveTrace.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "weaveTrace.commonLabels" . | nindent 8 }} + {{- include "weaveTrace.podLabels" . | nindent 8 }} + {{- include "weaveTrace.labels" . | nindent 8 }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 4 }} + {{- end }} + spec: + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + {{- if not .Values.migration.useHook }} + initContainers: + - name: {{ include "weaveTraceMigrate.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - "python" + - "migrator.py" + env: + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + {{- end }} + containers: + - name: {{ include "weaveTrace.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + ports: + - name: http + containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: API_PATH_PREFIX + value: "/traces" + - name: WANDB_PUBLIC_BASE_URL + value: {{ .Values.global.host }} + - name: WANDB_BASE_URL + value: http://{{ .Release.Name }}-app:8080/ + - name: WF_TRACE_SERVER_URL + value: "{{ .Values.global.host }}/traces" + - name: WF_ENFORCE_PASSWORD_LENGTH + value: "false" + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- if .Values.datadog.enabled }} + - name: DD_SERVICE + value: "{{ .Values.datadog.service }}" + - name: DD_ENV + value: "{{ .Values.datadog.env }}" + - name: DD_TRACE_ENABLED + value: "{{ .Values.datadog.traceEnabled }}" + - name: DD_LOGS_ENABLED + value: "{{ .Values.env.logsEnabled }}" + - name: DD_LOGS_INJECTION + value: "{{ .Values.env.logsInjection }}" + {{- end }} + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + {{- if not .Values.datadog.enabled }} + command: + - uvicorn + args: + - "src.trace_server:app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + {{- end }} + livenessProbe: + httpGet: + path: /traces/health + port: http + timeoutSeconds: 2 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /traces/health + port: http + timeoutSeconds: 2 + failureThreshold: 5 + startupProbe: + httpGet: + path: /traces/health + port: http + failureThreshold: 12 + periodSeconds: 10 + + resources: + {{- toYaml .Values.resources | nindent 12 }} +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "weaveTrace.fullname" . }} + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml b/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml new file mode 100644 index 00000000..39931cdb --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml @@ -0,0 +1,68 @@ +{{ if .Values.migration.useHook }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "weaveTraceMigrate.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.migration.labels -}} + {{- toYaml .Values.migration.labels | nindent 4 }} + {{- end }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + {{- if .Values.migration.annotations -}} + {{- toYaml .Values.migration.annotations | nindent 4 }} + {{- end }} +spec: + backoffLimit: 0 + activeDeadlineSeconds: 240 + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "weaveTrace.commonLabels" . | nindent 8 }} + {{- include "weaveTrace.podLabels" . | nindent 8 }} + {{- include "weaveTrace.labels" . | nindent 8 }} + {{- if .Values.migration.labels -}} + {{- toYaml .Values.migration.labels | nindent 8 }} + {{- end }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 4 }} + {{- end }} + spec: + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - name: {{ include "weaveTraceMigrate.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - "python" + - "migrator.py" + env: + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + restartPolicy: "Never" +{{ end }} diff --git a/charts/operator-wandb/charts/weave-trace/templates/service.yaml b/charts/operator-wandb/charts/weave-trace/templates/service.yaml new file mode 100644 index 00000000..5c9d3462 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 8722 + targetPort: 8080 + protocol: TCP + name: weave-trace + selector: + {{- include "weaveTrace.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave-trace/values.yaml b/charts/operator-wandb/charts/weave-trace/values.yaml new file mode 100644 index 00000000..410a2411 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/values.yaml @@ -0,0 +1,53 @@ +nameOverride: "" +fullnameOverride: "" + +image: + repository: wandb/weave-trace + tag: latest + pullPolicy: Always + +tolerations: [] + +extraEnv: {} +extraEnvFrom: {} + +extraCors: [] + +common: + labels: {} +deployment: {} +serviceAccount: {} +clusterRole: {} + +service: + type: ClusterIP + annotations: {} + +pod: + securityContext: + runAsNonRoot: true + runAsUser: 999 + runAsGroup: 0 + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 6Gi + +datadog: + enabled: false + service: "weave-trace" + env: "managed-install" + traceEnabled: false + logsEnabled: false + logsInjection: false + +migration: + # By default, use the init container method to migrate clickhouse + # Otherwise, use the helm pre-upgrade hook (may not work for install) + useHook: false \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave/templates/_helpers.tpl b/charts/operator-wandb/charts/weave/templates/_helpers.tpl index f487e845..4e67f8f1 100644 --- a/charts/operator-wandb/charts/weave/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/weave/templates/_helpers.tpl @@ -25,24 +25,6 @@ If release name contains chart name it will be used as a full name. {{- end }} {{- end }} -{{/* -Create a default fully qualified app name. (Should be something like wandb-app) -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "weave.appFullname" -}} -{{- if .Values.app.fullnameOverride }} -{{- .Values.app.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Values.app.serviceName .Values.app.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - {{/* Create chart name and version as used by the chart label. */}} diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index 0999ca80..343d6b26 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -53,16 +53,14 @@ spec: env: - name: ONLY_SERVICE value: weave - - name: WANDB_BASE_URL - value: http://{{ include "weave.appFullname" . }}:8080 - name: WANDB_PUBLIC_BASE_URL value: {{ .Values.global.host }} - name: WEAVE_LOG_FORMAT value: json - name: WEAVE_LOCAL_ARTIFACT_DIR value: /vol/weave/cache - - name: WEAVE_AUTH_GRAPHQL_URL - value: http://{{ include "weave.appFullname" . }}:8080/graphql + - name: WANDB_BASE_URL + value: http://{{ .Release.Name }}-app:8080/ - name: WEAVE_SERVER_NUM_WORKERS value: "4" diff --git a/charts/operator-wandb/templates/_clickhouse.tpl b/charts/operator-wandb/templates/_clickhouse.tpl new file mode 100644 index 00000000..5ceca7dd --- /dev/null +++ b/charts/operator-wandb/templates/_clickhouse.tpl @@ -0,0 +1,23 @@ +{{/* +Return the kafka client password +*/}} +{{- define "wandb.clickhouse.password" -}} +{{ .Values.global.clickhouse.password }} +{{- end -}} + +{{/* +Return name of secret where clickhouse information is stored +*/}} +{{- define "wandb.clickhouse.passwordSecret" -}} +{{- print .Release.Name "-clickhouse" -}} +{{- end -}} + +{{/* +Return the redis host +*/}} +{{- define "wandb.clickhouse.host" -}} +{{- if eq .Values.global.clickhouse.host "" -}} +{{- else -}} +{{ .Values.global.clickhouse.host }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/operator-wandb/templates/clickhouse.yaml b/charts/operator-wandb/templates/clickhouse.yaml new file mode 100644 index 00000000..d4f4a391 --- /dev/null +++ b/charts/operator-wandb/templates/clickhouse.yaml @@ -0,0 +1,10 @@ +--- +{{- $secretName := (include "wandb.clickhouse.passwordSecret" .) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} +data: + CLICKHOUSE_PASSWORD: {{ include "wandb.clickhouse.password" . | b64enc }} \ No newline at end of file diff --git a/charts/operator-wandb/templates/ingress.yaml b/charts/operator-wandb/templates/ingress.yaml index 44a184c2..fb23fca3 100644 --- a/charts/operator-wandb/templates/ingress.yaml +++ b/charts/operator-wandb/templates/ingress.yaml @@ -81,6 +81,15 @@ spec: http: paths: {{- include "IngressPath" $dot | nindent 6 }} + {{- if index $.Values "weave-trace" "install" }} + - pathType: Prefix + path: /traces + backend: + service: + name: {{ $.Release.Name }}-weave-trace + port: + number: 8722 + {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 39fc0c97..9b9a54a2 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -63,6 +63,14 @@ global: secret: "" clientId: "" + clickhouse: + install: false + host: "" + port: 8443 + password: "fake" + database: "weave_trace_db" + user: "default" + email: smtp: host: "" @@ -107,6 +115,9 @@ global: # If the topic already exists then changing the number of partitions is not possible. runUpdatesShadowNumPartitions: 1 + weave-trace: + enabled: false + ingress: install: true create: true @@ -159,6 +170,14 @@ weave: repository: wandb/local tag: latest +weave-trace: + install: false + image: + repository: wandb/weave-trace + tag: latest + datadog: + enabled: false + console: install: true image: From b0fa669ac07c2c7e746e4ba6dfe037b0cf88d765 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Fri, 19 Jul 2024 09:41:43 -0700 Subject: [PATCH 26/28] fix: Add an SA for parquet-backfill job (#180) --- charts/operator-wandb/charts/parquet/templates/cron.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 6c35a8f5..d593d539 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -136,6 +136,7 @@ spec: fieldPath: status.hostIP {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} + serviceAccountName: {{ include "parquet.serviceAccountName" . }} volumes: {{- if ne (include "wandb.redis.caCert" .) "" }} - name: {{ include "parquet.fullname" . }}-redis-ca From 6a16c8d49fa8204adb71211f1c5215cfa330930a Mon Sep 17 00:00:00 2001 From: Josiah Lee Date: Fri, 19 Jul 2024 15:29:31 -0700 Subject: [PATCH 27/28] chore(weave): Revert weave cache import update (#181) --- charts/operator-wandb/Chart.yaml | 2 +- charts/operator-wandb/charts/weave/templates/deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index bc1bc346..6e9715d3 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.15.1 +version: 0.15.2 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index 343d6b26..99c7d6b6 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -90,7 +90,7 @@ spec: - name: {{ include "weave.fullname" . }}-cache-clear image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - command: ["python", "-m", "weave.legacy.clear_cache"] + command: ["python", "-m", "weave.clear_cache"] env: - name: WEAVE_LOCAL_ARTIFACT_DIR From b5aceec0a8be7514313e69420ea108a5610e5ce8 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Mon, 22 Jul 2024 10:30:53 -0700 Subject: [PATCH 28/28] fix: Add an SA for weave-trace deployment (#182) --- charts/operator-wandb/Chart.yaml | 2 +- .../weave-trace/templates/deployment.yaml | 1 + .../weave-trace/templates/serviceaccount.yaml | 17 +++++++++++++++++ .../charts/weave-trace/values.yaml | 4 +++- .../charts/weave/templates/deployment.yaml | 2 +- .../charts/weave/templates/serviceaccount.yaml | 17 +++++++++++++++++ charts/operator-wandb/charts/weave/values.yaml | 4 +++- 7 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml create mode 100644 charts/operator-wandb/charts/weave/templates/serviceaccount.yaml diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 6e9715d3..bb341aa8 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.15.2 +version: 0.15.3 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml index a606fca2..653b1ea0 100644 --- a/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml @@ -142,6 +142,7 @@ spec: resources: {{- toYaml .Values.resources | nindent 12 }} + serviceAccountName: {{ include "weaveTrace.serviceAccountName" . }} --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler diff --git a/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml b/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml new file mode 100644 index 00000000..efd4314d --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "weaveTrace.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/weave-trace/values.yaml b/charts/operator-wandb/charts/weave-trace/values.yaml index 410a2411..44bf08a2 100644 --- a/charts/operator-wandb/charts/weave-trace/values.yaml +++ b/charts/operator-wandb/charts/weave-trace/values.yaml @@ -16,7 +16,9 @@ extraCors: [] common: labels: {} deployment: {} -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} service: diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index 99c7d6b6..b3b7b9a4 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -107,7 +107,7 @@ spec: resources: {{- toYaml .Values.cacheClear.resources | nindent 12 }} - + serviceAccountName: {{ include "weave.serviceAccountName" . }} volumes: - name: cache emptyDir: diff --git a/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml b/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml new file mode 100644 index 00000000..f2d37925 --- /dev/null +++ b/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "weave.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weave.commonLabels" . | nindent 4 }} + {{- include "weave.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/weave/values.yaml b/charts/operator-wandb/charts/weave/values.yaml index cb138c3b..06b3ea74 100644 --- a/charts/operator-wandb/charts/weave/values.yaml +++ b/charts/operator-wandb/charts/weave/values.yaml @@ -25,7 +25,9 @@ extraCors: [] common: labels: {} deployment: {} -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} service: