diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index fb0114ee..a5922f0d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,2 @@ * @gls4 @jsbroks @nfoucha @vanpelt -/charts/launch-agent/ @bcsherma @gtarpenning @KyleGoyette @nickpenaranda @TimH98 @wandb-zacharyblasczyk \ No newline at end of file +/charts/launch-agent/ @bcsherma @KyleGoyette @TimH98 \ No newline at end of file diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 2df6c49a..b136846d 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -3,7 +3,7 @@ name: launch-agent icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.13.5 +version: 0.13.10 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/README.md b/charts/launch-agent/README.md index cb3c46f4..bc270908 100644 --- a/charts/launch-agent/README.md +++ b/charts/launch-agent/README.md @@ -57,6 +57,7 @@ The table below describes all the available variables in the chart: | `launchConfig` | mutiline string | **Yes** | `null` | This should be set to the literal contents of your launch agent config. See the agent setup docs for details: https://docs.wandb.ai/guides/launch/setup-agent-advanced | | `volcano` | bool | No | `true` | Controls whether the volcano scheduler should be installed in your cluster along with the agent. Set to `false` to disable volcano installation. | | `gitCreds` | mutiline string | No | `null` | Contents of a git credentials file. | +| `sshAuthSecrets` | list(object) | No | `[]` | Name of secret containing an ssh-auth kubernetes secret and the associated host for the ssh key. | | `serviceAccount.annotations` | object | No | `null` | Annotations for the wandb service account. | | `azureStorageAccessKey` | string | No | "" | Azure storage access key required for kaniko to acces build contexts in azure blob storage. | | `additionalEnvVars` | map(string) | No | {} | Map with environment variables to be set in the Launch Agent pod. | diff --git a/charts/launch-agent/templates/configmap.yaml b/charts/launch-agent/templates/configmap.yaml index 6f5d3224..7af1d06f 100644 --- a/charts/launch-agent/templates/configmap.yaml +++ b/charts/launch-agent/templates/configmap.yaml @@ -8,4 +8,21 @@ kind: ConfigMap metadata: name: wandb-launch-configmap-{{ .Release.Name }} namespace: {{ .Values.namespace }} -... \ No newline at end of file +--- +{{- if .Values.sshAuthSecrets }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ssh-config-{{ .Release.Name }} + namespace: {{ .Values.namespace }} +data: + config: | + {{- range $index, $secret := .Values.sshAuthSecrets }} + Host {{ .host }} + IdentityFile /home/launch_agent/.ssh/id_repo{{ $index }} + IdentitiesOnly yes + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + {{- end }} +{{- end }} diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 15d1e754..851a051a 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -18,13 +18,23 @@ spec: metadata: labels: app: launch-agent-{{ .Release.Name }} -{{- if .Values.agent.labels }} -{{- toYaml .Values.agent.labels | trim | nindent 8 }} -{{- end }} + {{- if .Values.agent.labels }} + {{- toYaml .Values.agent.labels | trim | nindent 8 }} + {{- end }} spec: serviceAccountName: wandb-launch-serviceaccount-{{ .Release.Name }} - {{- if .Values.kanikoPvcName }} + {{- if or .Values.sshAuthSecrets .Values.kanikoPvcName }} initContainers: + {{- end}} + {{- if .Values.sshAuthSecrets }} + - name: init-create-ssh-dir + image: {{ .Values.agent.image }} + command: ["sh", "-c", "mkdir -p /home/launch_agent/.ssh"] + volumeMounts: + - name: ssh-dir + mountPath: /home/launch_agent/.ssh + {{- end }} + {{- if .Values.kanikoPvcName }} - name: kaniko-volume-chown image: {{ .Values.agent.image }} command: ["sh", "-c"] @@ -101,12 +111,31 @@ spec: value: {{ .Values.kanikoDockerConfigSecret }} {{- end }} volumeMounts: + - name: ssh-dir + mountPath: /home/launch_agent/.ssh - name: wandb-launch-config mountPath: /home/launch_agent/.config/wandb readOnly: true {{ if .Values.gitCreds}} - name: git-creds - mountPath: /home/launch_agent/ + mountPath: /home/launch_agent/.gitconfig + subPath: .gitconfig + readOnly: true + - name: git-creds + mountPath: /home/launch_agent/.git-credentials + subPath: .git-credentials + readOnly: true + {{ end }} + {{ if .Values.sshAuthSecrets }} + {{- range $index, $secret := .Values.sshAuthSecrets }} + - name: git-ssh-key-secret-{{ $index }} + mountPath: /home/launch_agent/.ssh/id_repo{{ $index }} + subPath: id_repo{{ $index }} + readOnly: true + {{- end }} + - name: ssh-config + mountPath: /home/launch_agent/.ssh/config + subPath: config readOnly: true {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} @@ -120,6 +149,8 @@ spec: mountPath: /home/launch_agent/kaniko {{ end }} volumes: + - name: ssh-dir + emptyDir: {} - name: wandb-launch-config configMap: name: wandb-launch-configmap-{{ .Release.Name }} @@ -127,10 +158,23 @@ spec: - name: git-creds secret: secretName: git-creds - - name: git-config - secret: - secretName: git-config {{ end}} + {{ if .Values.sshAuthSecrets }} + {{- range $index, $secret := .Values.sshAuthSecrets }} + - name: git-ssh-key-secret-{{ $index }} + secret: + secretName: {{ $secret.name }} + items: + - key: ssh-privatekey + path: id_repo{{ $index }} + {{- end }} + - name: ssh-config + configMap: + name: ssh-config-{{ .Release.Name }} + items: + - key: config + path: config + {{ end }} {{- if and .Values.customCABundle.configMap.name .Values.customCABundle.configMap.key }} - name: custom-cabundle configMap: @@ -138,12 +182,12 @@ spec: items: - key: {{ .Values.customCABundle.configMap.key }} path: custom-ca.crt - {{- end}} + {{- end }} {{ if .Values.kanikoPvcName }} - name: kaniko-pvc persistentVolumeClaim: claimName: {{ .Values.kanikoPvcName }} - {{- end}} + {{- end }} nodeSelector: {{- toYaml .Values.agent.nodeSelector | nindent 8 }} tolerations: diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 52b0a8c5..5872081e 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -5,7 +5,7 @@ agent: # Providing API key can be done external to this chart useExternalWandbSecret: false # Container image to use for the agent. - image: wandb/launch-agent:0.17.1 + image: wandb/launch-agent:0.17.3 # Image pull policy for agent image. imagePullPolicy: Always # Resources block for the agent spec. @@ -63,6 +63,14 @@ additionalSecretEnvVars: # repos. Example: https://username:password@example.com gitCreds: | +# list of secrets for the agent to use for ssh auth +# format is a list of secret names and hosts secrets +# should be created ass ssh-auth secrets, +# see: https://kubernetes.io/docs/concepts/configuration/secret/#ssh-authentication-secrets +sshAuthSecrets: + # - name: secret-name + # host: example.com + # Annotations for the wandb service account. Useful when setting up workload identity on gcp. serviceAccount: annotations: diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index 6cc7c094..13ec8ee7 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -8,6 +8,9 @@ dependencies: - name: weave repository: file://charts/weave version: 0.1.0 +- name: weave-trace + repository: file://charts/weave-trace + version: 0.1.0 - name: parquet repository: file://charts/parquet version: 0.1.0 @@ -35,5 +38,8 @@ dependencies: - name: stackdriver repository: file://charts/stackdriver version: 0.1.0 -digest: sha256:9a6c69506deb6969686d5b220a0692b53cfa29642e059bdf27c440c5d7086bdb -generated: "2024-06-05T11:04:02.508473-07:00" +- name: yace + repository: file://charts/yace + version: 0.1.0 +digest: sha256:bca2b6781737da6806e4485605cf9ce87b1428944b14cb88f082024cc3500bbd +generated: "2024-07-18T01:17:04.532871-04:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index d68a4a15..bb341aa8 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.13.14 +version: 0.15.3 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg @@ -24,6 +24,10 @@ dependencies: version: "*.*.*" repository: file://charts/weave condition: weave.install + - name: weave-trace + version: "*.*.*" + repository: file://charts/weave-trace + condition: weave-trace.install - name: parquet version: "*.*.*" repository: file://charts/parquet @@ -60,3 +64,7 @@ dependencies: version: "*.*.*" repository: file://charts/stackdriver condition: stackdriver.install + - name: yace + version: "*.*.*" + repository: file://charts/yace + condition: yace.install diff --git a/charts/operator-wandb/charts/app/templates/_helpers.tpl b/charts/operator-wandb/charts/app/templates/_helpers.tpl index 589cf9f0..b001fe0c 100644 --- a/charts/operator-wandb/charts/app/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/app/templates/_helpers.tpl @@ -116,7 +116,7 @@ app deployments. {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index c543eccc..5daf17a6 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -63,7 +63,7 @@ spec: secretKeyRef: name: {{ include "wandb.mysql.passwordSecret" . }} key: MYSQL_PASSWORD - command: ['bash', '-c', "until mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASSWORD -D$MYSQL_DATABASE --execute=\"SELECT 1\"; do echo waiting for db; sleep 2; done"] + command: ['bash', '-c', "until mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASSWORD -D$MYSQL_DATABASE -P$MYSQL_PORT --execute=\"SELECT 1\"; do echo waiting for db; sleep 2; done"] containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" @@ -114,6 +114,10 @@ spec: value: "http://{{ .Release.Name }}-parquet:8087" - name: PARQUET_ENABLED value: "true" + {{- if index .Values.global "weave-trace" "enabled" }} + - name: WEAVE_TRACES_ENABLED + value: "true" + {{- end }} {{- if ne (include "wandb.redis.password" .) "" }} - name: REDIS_PASSWORD @@ -227,6 +231,8 @@ spec: key: KAFKA_CLIENT_PASSWORD - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} + - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS + value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: OVERFLOW_BUCKET_ADDR value: "{{ include "app.bucket" .}}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE @@ -237,7 +243,7 @@ spec: "name": "wandb", "prefix": "wandb-overflow" }, - "addr": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@$(KAFKA_BROKER_HOST):$(KAFKA_BROKER_PORT)/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?producer_batch_bytes=1048576" + "addr": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@$(KAFKA_BROKER_HOST):$(KAFKA_BROKER_PORT)/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?producer_batch_bytes=1048576&num_partitions=$(KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS)" } {{- include "app.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 12 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} diff --git a/charts/operator-wandb/charts/app/templates/serviceaccount.yaml b/charts/operator-wandb/charts/app/templates/serviceaccount.yaml index c3824d12..f0639978 100644 --- a/charts/operator-wandb/charts/app/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/app/templates/serviceaccount.yaml @@ -1,3 +1,4 @@ +{{- if .Values.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: @@ -14,3 +15,4 @@ metadata: {{- if .Values.serviceAccount.annotations -}} {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} {{- end }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/app/values.yaml b/charts/operator-wandb/charts/app/values.yaml index 9afff84c..ffc8d8fb 100644 --- a/charts/operator-wandb/charts/app/values.yaml +++ b/charts/operator-wandb/charts/app/values.yaml @@ -53,6 +53,7 @@ resources: serviceAccount: create: true + annotations: {} role: {} roleBinding: {} diff --git a/charts/operator-wandb/charts/console/templates/deployment.yaml b/charts/operator-wandb/charts/console/templates/deployment.yaml index 835a727d..54c983b4 100644 --- a/charts/operator-wandb/charts/console/templates/deployment.yaml +++ b/charts/operator-wandb/charts/console/templates/deployment.yaml @@ -34,7 +34,7 @@ spec: {{- toYaml .Values.pod.annotations | nindent 4 }} {{- end }} spec: - serviceAccountName: {{ include "console.fullname" . }} + serviceAccountName: {{ include "console.serviceAccountName" . }} {{- if .tolerations }} tolerations: {{- toYaml .tolerations | nindent 8 }} diff --git a/charts/operator-wandb/charts/console/templates/service.yaml b/charts/operator-wandb/charts/console/templates/service.yaml index 71c3a6a8..b4a4ab12 100644 --- a/charts/operator-wandb/charts/console/templates/service.yaml +++ b/charts/operator-wandb/charts/console/templates/service.yaml @@ -11,7 +11,10 @@ metadata: {{- toYaml .Values.service.labels | nindent 4 }} {{- end }} annotations: - {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if eq .Values.global.cloudProvider "aws" }} + alb.ingress.kubernetes.io/healthcheck-path: /console/api/ready + {{- end }} + {{- include "wandb.serviceAnnotations" $ | nindent 4 }} {{- if .Values.service.annotations -}} {{- toYaml .Values.service.annotations | nindent 4 }} {{- end }} diff --git a/charts/operator-wandb/charts/console/templates/serviceaccount.yaml b/charts/operator-wandb/charts/console/templates/serviceaccount.yaml index 2eea38c2..3cda2894 100644 --- a/charts/operator-wandb/charts/console/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/console/templates/serviceaccount.yaml @@ -1,3 +1,4 @@ +{{- if .Values.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount metadata: @@ -14,3 +15,4 @@ metadata: {{- if .Values.serviceAccount.annotations -}} {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/console/values.yaml b/charts/operator-wandb/charts/console/values.yaml index ea23750e..827f646a 100644 --- a/charts/operator-wandb/charts/console/values.yaml +++ b/charts/operator-wandb/charts/console/values.yaml @@ -21,7 +21,9 @@ extraCors: [] common: labels: {} deployment: {} -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} pod: diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl b/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl index a52b8aa3..61c54a41 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/_helpers.tpl @@ -112,7 +112,7 @@ Create the name of the service account to use {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml index e08f15de..01fe5040 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/deployment.yaml @@ -45,12 +45,20 @@ spec: {{- include "wandb.nodeSelector" . | nindent 6 }} {{- include "wandb.priorityClassName" . | nindent 6 }} {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + # needed to ensure ensure Kafka consumers handle pod termination gracefully and avoid data loss + terminationGracePeriodSeconds: 60 containers: - name: {{ .Chart.Name }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "flat-run-fields-updater.fullname" . }}-redis-ca + mountPath: /etc/ssl/certs/redis_ca.pem + subPath: redis_ca.pem + {{- end }} env: - name: POD_NAME valueFrom: @@ -108,8 +116,14 @@ spec: key: KAFKA_CLIENT_PASSWORD - name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}} + - name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS + value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}" - name: BUCKET value: "{{ include "flat-run-fields-updater.bucket" .}}" + - name: AWS_REGION + value: {{ .Values.global.bucket.region }} + - name: AWS_S3_KMS_ID + value: "{{ .Values.global.bucket.kmsKey }}" - name: GORILLA_RUN_UPDATE_SHADOW_QUEUE value: > { @@ -119,7 +133,7 @@ spec: "prefix": "wandb-overflow" }, "subscriptions": { - "flatRunFieldsUpdater": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@wandb-kafka:9092/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?consumer_group_id=default-group" + "flatRunFieldsUpdater": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@wandb-kafka:9092/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?consumer_group_id=default-group&num_partitions=$(KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS)" } } {{- if ne (include "wandb.redis.password" .) "" }} @@ -157,4 +171,13 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + volumes: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "flat-run-fields-updater.fullname" . }}-redis-ca + secret: + secretName: "{{ include "wandb.redis.passwordSecret" . }}" + items: + - key: REDIS_CA_CERT + path: redis_ca.pem + {{- end }} {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml b/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml index 18053197..13cca672 100644 --- a/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml +++ b/charts/operator-wandb/charts/flat-run-fields-updater/templates/serviceaccount.yaml @@ -3,6 +3,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "flat-run-fields-updater.serviceAccountName" . }} + namespace: {{ $.Release.Namespace }} labels: {{- include "wandb.commonLabels" . | nindent 4 }} {{- include "flat-run-fields-updater.commonLabels" . | nindent 4 }} diff --git a/charts/operator-wandb/charts/parquet/templates/_helpers.tpl b/charts/operator-wandb/charts/parquet/templates/_helpers.tpl index c2a077e3..32414268 100644 --- a/charts/operator-wandb/charts/parquet/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/parquet/templates/_helpers.tpl @@ -116,7 +116,7 @@ app deployments. {{- $bucket = printf "az://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "gcs" -}} -{{- $bucket = printf "gs://%s" .Values.global.bucket.name -}} +{{- $bucket = printf "gs://%s/%s" .Values.global.bucket.name .Values.global.bucket.path -}} {{- end -}} {{- if eq .Values.global.bucket.provider "s3" -}} {{- if and .Values.global.bucket.accessKey .Values.global.bucket.secretKey -}} diff --git a/charts/operator-wandb/charts/parquet/templates/cron.yaml b/charts/operator-wandb/charts/parquet/templates/cron.yaml index 4d082e29..d593d539 100644 --- a/charts/operator-wandb/charts/parquet/templates/cron.yaml +++ b/charts/operator-wandb/charts/parquet/templates/cron.yaml @@ -49,6 +49,12 @@ spec: "megabinary", "glue", ] + volumeMounts: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + mountPath: /etc/ssl/certs/redis_ca.pem + subPath: redis_ca.pem + {{- end }} env: - name: GORILLA_GLUE_EXECUTE value: "true" @@ -130,5 +136,15 @@ spec: fieldPath: status.hostIP {{- include "parquet.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 16 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 16 }} + serviceAccountName: {{ include "parquet.serviceAccountName" . }} + volumes: + {{- if ne (include "wandb.redis.caCert" .) "" }} + - name: {{ include "parquet.fullname" . }}-redis-ca + secret: + secretName: "{{ include "wandb.redis.passwordSecret" . }}" + items: + - key: REDIS_CA_CERT + path: redis_ca.pem + {{- end }} restartPolicy: Never {{- end }} diff --git a/charts/operator-wandb/charts/parquet/templates/deployment.yaml b/charts/operator-wandb/charts/parquet/templates/deployment.yaml index e37e973f..ddfd26cf 100644 --- a/charts/operator-wandb/charts/parquet/templates/deployment.yaml +++ b/charts/operator-wandb/charts/parquet/templates/deployment.yaml @@ -140,6 +140,7 @@ spec: resources: {{- toYaml .Values.resources | nindent 12 }} + serviceAccountName: {{ include "parquet.serviceAccountName" . }} volumes: {{- if ne (include "wandb.redis.caCert" .) "" }} - name: {{ include "parquet.fullname" . }}-redis-ca diff --git a/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml b/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml new file mode 100644 index 00000000..0a1caf7e --- /dev/null +++ b/charts/operator-wandb/charts/parquet/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "parquet.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "parquet.commonLabels" . | nindent 4 }} + {{- include "parquet.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/parquet/values.yaml b/charts/operator-wandb/charts/parquet/values.yaml index 591417fa..547a3700 100644 --- a/charts/operator-wandb/charts/parquet/values.yaml +++ b/charts/operator-wandb/charts/parquet/values.yaml @@ -22,7 +22,9 @@ cronJob: exportHistoryToParquet: enabled: false schedule: "11 * * * *" -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} service: diff --git a/charts/operator-wandb/charts/weave-trace/.helmignore b/charts/operator-wandb/charts/weave-trace/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/weave-trace/Chart.yaml b/charts/operator-wandb/charts/weave-trace/Chart.yaml new file mode 100644 index 00000000..4f8851e8 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: weave-trace +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "1.0.0" diff --git a/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl b/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl new file mode 100644 index 00000000..4b6f1096 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/_helpers.tpl @@ -0,0 +1,110 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "weaveTrace.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified name for weave-trace. (Should be something like wandb-weave-trace) +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "weaveTrace.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create a default fully qualified name for the weave-trace migration. (Should be something like wandb-weave-trace-migrate) +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "weaveTraceMigrate.fullname" -}} +{{ printf "%s-migrate" (include "weaveTrace.fullname" .) | trunc 63 | trimSuffix "-" }} +{{- end }} + + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "weaveTrace.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "weaveTrace.labels" -}} +helm.sh/chart: {{ include "weaveTrace.chart" . }} +{{ include "weaveTrace.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "weaveTrace.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "weaveTrace.selectorLabels" -}} +app.kubernetes.io/name: {{ include "weaveTrace.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "weaveTrace.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "weaveTrace.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "weaveTrace.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "weaveTrace.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "weaveTrace.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} diff --git a/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml new file mode 100644 index 00000000..653b1ea0 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/deployment.yaml @@ -0,0 +1,169 @@ +{{- $imageCfg := dict "global" $.Values.global.image "local" $.Values.image -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "weaveTrace.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "weaveTrace.commonLabels" . | nindent 8 }} + {{- include "weaveTrace.podLabels" . | nindent 8 }} + {{- include "weaveTrace.labels" . | nindent 8 }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 4 }} + {{- end }} + spec: + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + {{- if not .Values.migration.useHook }} + initContainers: + - name: {{ include "weaveTraceMigrate.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - "python" + - "migrator.py" + env: + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + {{- end }} + containers: + - name: {{ include "weaveTrace.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + ports: + - name: http + containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: API_PATH_PREFIX + value: "/traces" + - name: WANDB_PUBLIC_BASE_URL + value: {{ .Values.global.host }} + - name: WANDB_BASE_URL + value: http://{{ .Release.Name }}-app:8080/ + - name: WF_TRACE_SERVER_URL + value: "{{ .Values.global.host }}/traces" + - name: WF_ENFORCE_PASSWORD_LENGTH + value: "false" + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- if .Values.datadog.enabled }} + - name: DD_SERVICE + value: "{{ .Values.datadog.service }}" + - name: DD_ENV + value: "{{ .Values.datadog.env }}" + - name: DD_TRACE_ENABLED + value: "{{ .Values.datadog.traceEnabled }}" + - name: DD_LOGS_ENABLED + value: "{{ .Values.env.logsEnabled }}" + - name: DD_LOGS_INJECTION + value: "{{ .Values.env.logsInjection }}" + {{- end }} + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + {{- if not .Values.datadog.enabled }} + command: + - uvicorn + args: + - "src.trace_server:app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + {{- end }} + livenessProbe: + httpGet: + path: /traces/health + port: http + timeoutSeconds: 2 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /traces/health + port: http + timeoutSeconds: 2 + failureThreshold: 5 + startupProbe: + httpGet: + path: /traces/health + port: http + failureThreshold: 12 + periodSeconds: 10 + + resources: + {{- toYaml .Values.resources | nindent 12 }} + serviceAccountName: {{ include "weaveTrace.serviceAccountName" . }} +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "weaveTrace.fullname" . }} + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml b/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml new file mode 100644 index 00000000..39931cdb --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/migrate-hook.yaml @@ -0,0 +1,68 @@ +{{ if .Values.migration.useHook }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "weaveTraceMigrate.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.migration.labels -}} + {{- toYaml .Values.migration.labels | nindent 4 }} + {{- end }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + {{- if .Values.migration.annotations -}} + {{- toYaml .Values.migration.annotations | nindent 4 }} + {{- end }} +spec: + backoffLimit: 0 + activeDeadlineSeconds: 240 + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "weaveTrace.commonLabels" . | nindent 8 }} + {{- include "weaveTrace.podLabels" . | nindent 8 }} + {{- include "weaveTrace.labels" . | nindent 8 }} + {{- if .Values.migration.labels -}} + {{- toYaml .Values.migration.labels | nindent 8 }} + {{- end }} + annotations: + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 4 }} + {{- end }} + spec: + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - name: {{ include "weaveTraceMigrate.fullname" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - "python" + - "migrator.py" + env: + - name: WF_CLICKHOUSE_HOST + value: "{{ .Values.global.clickhouse.host }}" + - name: WF_CLICKHOUSE_PORT + value: "{{ .Values.global.clickhouse.port }}" + - name: WF_CLICKHOUSE_DATABASE + value: "{{ .Values.global.clickhouse.database }}" + - name: WF_CLICKHOUSE_USER + value: "{{ .Values.global.clickhouse.user }}" + - name: WF_CLICKHOUSE_PASS + valueFrom: + secretKeyRef: + name: {{ include "wandb.clickhouse.passwordSecret" . }} + key: CLICKHOUSE_PASSWORD + {{- include "weaveTrace.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} + {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} + restartPolicy: "Never" +{{ end }} diff --git a/charts/operator-wandb/charts/weave-trace/templates/service.yaml b/charts/operator-wandb/charts/weave-trace/templates/service.yaml new file mode 100644 index 00000000..5c9d3462 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "weaveTrace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 8722 + targetPort: 8080 + protocol: TCP + name: weave-trace + selector: + {{- include "weaveTrace.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml b/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml new file mode 100644 index 00000000..efd4314d --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "weaveTrace.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.commonLabels" . | nindent 4 }} + {{- include "weaveTrace.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/weave-trace/values.yaml b/charts/operator-wandb/charts/weave-trace/values.yaml new file mode 100644 index 00000000..44bf08a2 --- /dev/null +++ b/charts/operator-wandb/charts/weave-trace/values.yaml @@ -0,0 +1,55 @@ +nameOverride: "" +fullnameOverride: "" + +image: + repository: wandb/weave-trace + tag: latest + pullPolicy: Always + +tolerations: [] + +extraEnv: {} +extraEnvFrom: {} + +extraCors: [] + +common: + labels: {} +deployment: {} +serviceAccount: + create: true + annotations: {} +clusterRole: {} + +service: + type: ClusterIP + annotations: {} + +pod: + securityContext: + runAsNonRoot: true + runAsUser: 999 + runAsGroup: 0 + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 6Gi + +datadog: + enabled: false + service: "weave-trace" + env: "managed-install" + traceEnabled: false + logsEnabled: false + logsInjection: false + +migration: + # By default, use the init container method to migrate clickhouse + # Otherwise, use the helm pre-upgrade hook (may not work for install) + useHook: false \ No newline at end of file diff --git a/charts/operator-wandb/charts/weave/templates/_helpers.tpl b/charts/operator-wandb/charts/weave/templates/_helpers.tpl index f487e845..4e67f8f1 100644 --- a/charts/operator-wandb/charts/weave/templates/_helpers.tpl +++ b/charts/operator-wandb/charts/weave/templates/_helpers.tpl @@ -25,24 +25,6 @@ If release name contains chart name it will be used as a full name. {{- end }} {{- end }} -{{/* -Create a default fully qualified app name. (Should be something like wandb-app) -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "weave.appFullname" -}} -{{- if .Values.app.fullnameOverride }} -{{- .Values.app.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Values.app.serviceName .Values.app.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - {{/* Create chart name and version as used by the chart label. */}} diff --git a/charts/operator-wandb/charts/weave/templates/deployment.yaml b/charts/operator-wandb/charts/weave/templates/deployment.yaml index a289ca9e..b3b7b9a4 100644 --- a/charts/operator-wandb/charts/weave/templates/deployment.yaml +++ b/charts/operator-wandb/charts/weave/templates/deployment.yaml @@ -53,14 +53,14 @@ spec: env: - name: ONLY_SERVICE value: weave - - name: WANDB_BASE_URL + - name: WANDB_PUBLIC_BASE_URL value: {{ .Values.global.host }} - name: WEAVE_LOG_FORMAT value: json - name: WEAVE_LOCAL_ARTIFACT_DIR value: /vol/weave/cache - - name: WEAVE_AUTH_GRAPHQL_URL - value: http://{{ include "weave.appFullname" . }}.{{ $.Release.Namespace }}.svc.{{ .Values.app.clusterDomain }}:8080/graphql + - name: WANDB_BASE_URL + value: http://{{ .Release.Name }}-app:8080/ - name: WEAVE_SERVER_NUM_WORKERS value: "4" @@ -105,6 +105,9 @@ spec: - name: cache mountPath: /vol/weave/cache + resources: + {{- toYaml .Values.cacheClear.resources | nindent 12 }} + serviceAccountName: {{ include "weave.serviceAccountName" . }} volumes: - name: cache emptyDir: diff --git a/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml b/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml new file mode 100644 index 00000000..f2d37925 --- /dev/null +++ b/charts/operator-wandb/charts/weave/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "weave.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "weave.commonLabels" . | nindent 4 }} + {{- include "weave.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/operator-wandb/charts/weave/values.yaml b/charts/operator-wandb/charts/weave/values.yaml index 542fd2c4..06b3ea74 100644 --- a/charts/operator-wandb/charts/weave/values.yaml +++ b/charts/operator-wandb/charts/weave/values.yaml @@ -25,7 +25,9 @@ extraCors: [] common: labels: {} deployment: {} -serviceAccount: {} +serviceAccount: + create: true + annotations: {} clusterRole: {} service: @@ -57,3 +59,9 @@ cache: intervalInHours: 24 size: 20Gi medium: "" + +cacheClear: + resources: + requests: + cpu: 100m + memory: 128Mi \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/.helmignore b/charts/operator-wandb/charts/yace/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/yace/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/yace/Chart.yaml b/charts/operator-wandb/charts/yace/Chart.yaml new file mode 100644 index 00000000..12bf5488 --- /dev/null +++ b/charts/operator-wandb/charts/yace/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: yace +description: A Helm chart for Kubernetes +version: 0.1.0 +appVersion: "v0.60.0" diff --git a/charts/operator-wandb/charts/yace/templates/_helpers.tpl b/charts/operator-wandb/charts/yace/templates/_helpers.tpl new file mode 100644 index 00000000..351bdda5 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/_helpers.tpl @@ -0,0 +1,102 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "yace.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "yace.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "yace.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "yace.labels" -}} +helm.sh/chart: {{ include "yace.chart" . }} +{{ include "yace.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "yace.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "yace.selectorLabels" -}} +app.kubernetes.io/name: {{ include "yace.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "yace.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "yace.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "yace.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "yace.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "yace.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} + diff --git a/charts/operator-wandb/charts/yace/templates/configmap.yaml b/charts/operator-wandb/charts/yace/templates/configmap.yaml new file mode 100644 index 00000000..42fd5728 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.configMap.labels -}} + {{- toYaml .Values.configMap.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.configMap.annotations -}} + {{- toYaml .Values.configMap.annotations | nindent 4 }} + {{- end }} +data: + config.yml: | +{{- (tpl .Values.config $) | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/deployment.yaml b/charts/operator-wandb/charts/yace/templates/deployment.yaml new file mode 100644 index 00000000..4534f6c0 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/deployment.yaml @@ -0,0 +1,89 @@ +{{- if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.deployment.labels -}} + {{- toYaml .Values.deployment.labels | nindent 4 }} + {{- end }} + annotations: + {{- include "wandb.deploymentAnnotations" $ | nindent 4 }} + {{- if .Values.deployment.annotations -}} + {{- toYaml .Values.deployment.annotations | nindent 4 }} + {{- end }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "wandb.selectorLabels" $ | nindent 6 }} + {{- include "yace.labels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "wandb.podLabels" . | nindent 8 }} + {{- include "yace.commonLabels" . | nindent 8 }} + {{- include "yace.podLabels" . | nindent 8 }} + {{- include "yace.labels" . | nindent 8 }} + annotations: + checksum/configmap: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- if .Values.pod.annotations -}} + {{- toYaml .Values.pod.annotations | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "yace.serviceAccountName" . }} + {{- if .tolerations }} + tolerations: + {{- toYaml .tolerations | nindent 8 }} + {{- end }} + {{- include "wandb.nodeSelector" . | nindent 6 }} + {{- include "wandb.priorityClassName" . | nindent 6 }} + {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + command: + - yace + - --config.file=/config/config.yml + - --scraping-interval=60 + ports: + - containerPort: 5000 + name: http + protocol: TCP + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: http + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - mountPath: /config + name: yace-config + volumes: + - name: config + configMap: + defaultMode: 420 + name: {{ include "yace.fullname" . }} + volumes: + - configMap: + defaultMode: 420 + name: {{ include "yace.fullname" . }} + name: yace-config +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/service.yaml b/charts/operator-wandb/charts/yace/templates/service.yaml new file mode 100644 index 00000000..268d9b22 --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "yace.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- if .Values.service.labels -}} + {{- toYaml .Values.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.service.annotations -}} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 5000 + protocol: TCP + name: yace + selector: + {{- include "yace.labels" . | nindent 4 }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml b/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml new file mode 100644 index 00000000..69eddbba --- /dev/null +++ b/charts/operator-wandb/charts/yace/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "yace.serviceAccountName" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "yace.commonLabels" . | nindent 4 }} + {{- include "yace.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.labels -}} + {{- toYaml .Values.serviceAccount.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.serviceAccount.annotations -}} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/charts/yace/values.yaml b/charts/operator-wandb/charts/yace/values.yaml new file mode 100644 index 00000000..e716a19c --- /dev/null +++ b/charts/operator-wandb/charts/yace/values.yaml @@ -0,0 +1,105 @@ +enabled: true + +nameOverride: "" +fullnameOverride: "" + +image: + registry: ghcr.io + repository: nerdswords/yet-another-cloudwatch-exporter + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: v0.60.0 + +# Tolerations for pod scheduling +tolerations: [] + +extraEnv: {} +extraEnvFrom: {} + + +config: |- + apiVersion: v1alpha1 + discovery: + jobs: + - type: AWS/ElastiCache + regions: + {{- range .Values.regions }} + - {{ . }} + {{- end }} + period: 60 + length: 60 + metrics: + - name: CPUUtilization + statistics: [Average] + - name: FreeableMemory + statistics: [Average] + - name: NetworkBytesIn + statistics: [Average] + - name: NetworkBytesOut + statistics: [Average] + - name: NetworkPacketsIn + statistics: [Average] + - name: NetworkPacketsOut + statistics: [Average] + - name: SwapUsage + statistics: [Average] + - name: CPUCreditUsage + statistics: [Average] + - type: AWS/RDS + regions: + {{- range .Values.regions }} + - {{ . }} + {{- end }} + period: 60 + length: 60 + metrics: + - name: CPUUtilization + statistics: [Maximum] + - name: DatabaseConnections + statistics: [Sum] + - name: FreeableMemory + statistics: [Average] + - name: FreeStorageSpace + statistics: [Average] + - name: ReadThroughput + statistics: [Average] + - name: WriteThroughput + statistics: [Average] + - name: ReadLatency + statistics: [Maximum] + - name: WriteLatency + statistics: [Maximum] + - name: ReadIOPS + statistics: [Average] + - name: WriteIOPS + statistics: [Average] +service: + type: ClusterIP + annotations: {} + +pod: + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" + labels: {} + +deployment: {} + +serviceAccount: + create: true + +common: + labels: {} + annotations: {} + +configMap: + labels: {} + annotations: {} + +resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 500m + memory: 500Mi diff --git a/charts/operator-wandb/templates/_clickhouse.tpl b/charts/operator-wandb/templates/_clickhouse.tpl new file mode 100644 index 00000000..5ceca7dd --- /dev/null +++ b/charts/operator-wandb/templates/_clickhouse.tpl @@ -0,0 +1,23 @@ +{{/* +Return the kafka client password +*/}} +{{- define "wandb.clickhouse.password" -}} +{{ .Values.global.clickhouse.password }} +{{- end -}} + +{{/* +Return name of secret where clickhouse information is stored +*/}} +{{- define "wandb.clickhouse.passwordSecret" -}} +{{- print .Release.Name "-clickhouse" -}} +{{- end -}} + +{{/* +Return the redis host +*/}} +{{- define "wandb.clickhouse.host" -}} +{{- if eq .Values.global.clickhouse.host "" -}} +{{- else -}} +{{ .Values.global.clickhouse.host }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/operator-wandb/templates/_kafka.tpl b/charts/operator-wandb/templates/_kafka.tpl index 5c81299d..d53f45e6 100644 --- a/charts/operator-wandb/templates/_kafka.tpl +++ b/charts/operator-wandb/templates/_kafka.tpl @@ -59,3 +59,10 @@ Return the kafka topic name for run-updates-shadow {{- define "wandb.kafka.runUpdatesShadowTopic" -}} {{ printf "%s-%s" .Release.Name "run-updates-shadow" | trunc 63 | trimSuffix "-" }} {{- end -}} + +{{/* +Return the number of partitions for run-updates-shadow +*/}} +{{- define "wandb.kafka.runUpdatesShadowNumPartitions" -}} +{{- print .Values.global.kafka.runUpdatesShadowNumPartitions -}} +{{- end -}} diff --git a/charts/operator-wandb/templates/clickhouse.yaml b/charts/operator-wandb/templates/clickhouse.yaml new file mode 100644 index 00000000..d4f4a391 --- /dev/null +++ b/charts/operator-wandb/templates/clickhouse.yaml @@ -0,0 +1,10 @@ +--- +{{- $secretName := (include "wandb.clickhouse.passwordSecret" .) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} +data: + CLICKHOUSE_PASSWORD: {{ include "wandb.clickhouse.password" . | b64enc }} \ No newline at end of file diff --git a/charts/operator-wandb/templates/ingress.yaml b/charts/operator-wandb/templates/ingress.yaml index 44a184c2..fb23fca3 100644 --- a/charts/operator-wandb/templates/ingress.yaml +++ b/charts/operator-wandb/templates/ingress.yaml @@ -81,6 +81,15 @@ spec: http: paths: {{- include "IngressPath" $dot | nindent 6 }} + {{- if index $.Values "weave-trace" "install" }} + - pathType: Prefix + path: /traces + backend: + service: + name: {{ $.Release.Name }}-weave-trace + port: + number: 8722 + {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index a5687855..9b9a54a2 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -14,6 +14,8 @@ global: host: "http://localhost:8080" license: "" + cloudProvider: "" + storageClass: "" banners: @@ -61,6 +63,14 @@ global: secret: "" clientId: "" + clickhouse: + install: false + host: "" + port: 8443 + password: "fake" + database: "weave_trace_db" + user: "default" + email: smtp: host: "" @@ -101,6 +111,12 @@ global: brokerHost: "" brokerPort: 9092 runUpdatesShadowTopic: "" + # This value will only apply upon initial topic creation. + # If the topic already exists then changing the number of partitions is not possible. + runUpdatesShadowNumPartitions: 1 + + weave-trace: + enabled: false ingress: install: true @@ -154,6 +170,14 @@ weave: repository: wandb/local tag: latest +weave-trace: + install: false + image: + repository: wandb/weave-trace + tag: latest + datadog: + enabled: false + console: install: true image: @@ -171,6 +195,15 @@ mysql: persistence: size: 20Gi storageClass: "" +yace: + install: false + regions: ["ap-south-1"] + pod: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5000" + prometheus.io/path: "/metrics" + prometheus.io/scheme: http redis: install: true @@ -199,7 +232,7 @@ prometheus: install: true stackdriver: - install: true + install: false pod: annotations: prometheus.io/scrape: "true"