diff --git a/charts/cp-ksql-server/.helmignore b/charts/cp-ksql-server/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/charts/cp-ksql-server/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/cp-ksql-server/Chart.yaml b/charts/cp-ksql-server/Chart.yaml new file mode 100644 index 00000000..12e8eb8d --- /dev/null +++ b/charts/cp-ksql-server/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Confluent KSQL Server on Kubernetes +name: cp-ksql-server +version: 0.1.0 diff --git a/charts/cp-ksql-server/README.md b/charts/cp-ksql-server/README.md new file mode 100644 index 00000000..4a6a4039 --- /dev/null +++ b/charts/cp-ksql-server/README.md @@ -0,0 +1,158 @@ +# KSQL Server Helm Chart + +This chart bootstraps a deployment of a Confluent KSQL Server. + +This is an example deployment which runs KSQL Server in non-interactive +mode. +The included queries file `queries.sql` is a stub provided to illustrate one possible approach to mounting queries in the server container via ConfigMap. + +## Prerequisites + +* Kubernetes 1.9.2+ +* Helm 2.8.2+ +* A healthy and accessible Kafka Cluster + +## Developing Environment + +* [Pivotal Container Service (PKS)](https://pivotal.io/platform/pivotal-container-service) +* [Google Kubernetes Engine (GKE)](https://cloud.google.com/kubernetes-engine/) + +## Docker Image Source + +* [DockerHub -> ConfluentInc](https://hub.docker.com/u/confluentinc/) + +## Installing the Chart + +### Install along with cp-helm-charts + +```console +git clone https://github.com/confluentinc/cp-helm-charts.git +helm install cp-helm-charts +``` + +To install with a specific name, you can do: + +```console +helm install --name my-confluent cp-helm-charts +``` + +### Install with a existing cp-kafka and cp-schema-registry release + +```console +helm install --set cp-zookeeper.url="unhinged-robin-cp-zookeeper:2181",cp-schema-registry.url="lolling-chinchilla-cp-schema-registry:8081" cp-helm-charts/charts/cp-ksql-server +``` + +### Installed Components + +You can use `helm status ` to view all of the installed components. + +For example: + +```console{%raw} +$ helm status excited-lynx +STATUS: DEPLOYED + +RESOURCES: +==> v1/Service +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +excited-lynx-cp-ksql-server ClusterIP 10.31.253.70 8088/TCP 10s + +==> v1beta2/Deployment +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +excited-lynx-cp-ksql-server 1 1 1 0 10s + +==> v1/Pod(related) +NAME READY STATUS RESTARTS AGE +excited-lynx-cp-ksql-server-d4848ff94-x5fmn 2/2 Running 1 10s + +==> v1/ConfigMap +NAME DATA AGE +excited-lynx-cp-ksql-server-jmx-configmap 1 10s +excited-lynx-cp-ksql-server-ksql-queries-configmap 1 10s + + +NOTES: +This chart installs Confluent KSQL Server. + +https://docs.confluent.io/current/ksql/docs +``` + +There are +1. A [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) `excited-lynx-cp-ksql-server` which contains 1 KSQL Server instance [Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/): `excited-lynx-cp-ksql-server-d4848ff94-x5fmn`. +1. A [Service](https://kubernetes.io/docs/concepts/services-networking/service/) `excited-lynx-cp-kafka-rest` for clients to connect to REST Proxy. +1. A [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/) which contains configuration for Prometheus JMX Exporter. +1. A [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/) which contains SQL queries for the server to run in non-interactive mode. + +## Configuration + +You can specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. + +Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example, + +```console +helm install --name my-ksql-server -f my-values.yaml ./cp-ksql-server +``` + +> **Tip**: A default [values.yaml](values.yaml) is provided + +### KSQL Server Deployment + +The configuration parameters in this section control the resources requested and utilized by the cp-ksql-server chart. + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `replicaCount` | The number of KSQL Server instances. | `1` | + +### Image + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `image` | Docker Image of Confluent KSQL Server. | `confluentinc/cp-ksql-server` | +| `imageTag` | Docker Image Tag of Confluent KSQL Server. | `5.0.0-beta30` | +| `imagePullPolicy` | Docker Image Tag of Confluent KSQL Server. | `IfNotPresent` | + +### Port + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `servicePort` | The port on which the KSQL Server will be available and serving requests. | `8088` | + +### Resources + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `resources.requests.cpu` | The amount of CPU to request. | see [values.yaml](values.yaml) for details | +| `resources.requests.memory` | The amount of memory to request. | see [values.yaml](values.yaml) for details | +| `resources.requests.limit` | The upper limit CPU usage for a KSQL Server Pod. | see [values.yaml](values.yaml) for details | +| `resources.requests.limit` | The upper limit memory usage for a KSQL Server Pod. | see [values.yaml](values.yaml) for details | + +### JMX Configuration + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `jmx.port` | The jmx port which JMX style metrics are exposed. | `5555` | + +### Prometheus JMX Exporter Configuration + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `prometheus.jmx.enabled` | Whether or not to install Prometheus JMX Exporter as a sidecar container and expose JMX metrics to Prometheus. | `true` | +| `prometheus.jmx.image` | Docker Image for Prometheus JMX Exporter container. | `solsson/kafka-prometheus-jmx-exporter@sha256` | +| `prometheus.jmx.imageTag` | Docker Image Tag for Prometheus JMX Exporter container. | `a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8` | +| `prometheus.jmx.port` | JMX Exporter Port which exposes metrics in Prometheus format for scraping. | `5556` | + +### External Access + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `external.enabled` | whether or not to allow external access to KSQL Server | `false` | +| `external.type` | `Kubernetes Service Type` to expose KSQL Server to external | `LoadBalancer` | + +## Dependencies + +### Schema Registry (optional) + +| Parameter | Description | Default | +| --------- | ----------- | ------- | +| `cp-schema-registry.url` | Service name of Schema Registry (Not needed if this is installed along with cp-kafka chart). | `""` | +| `cp-schema-registry.port` | Port of Schema Registry Service | `8081` | diff --git a/charts/cp-ksql-server/queries.sql b/charts/cp-ksql-server/queries.sql new file mode 100644 index 00000000..8014efe8 --- /dev/null +++ b/charts/cp-ksql-server/queries.sql @@ -0,0 +1,19 @@ +-- From http://docs.confluent.io/current/ksql/docs/tutorials/basics-docker.html#create-a-stream-and-table + +-- Create a stream pageviews_original from the Kafka topic pageviews, specifying the value_format of DELIMITED +CREATE STREAM pageviews_original (viewtime bigint, userid varchar, pageid varchar) WITH (kafka_topic='pageviews', value_format='DELIMITED'); + +-- Create a table users_original from the Kafka topic users, specifying the value_format of JSON +CREATE TABLE users_original (registertime BIGINT, gender VARCHAR, regionid VARCHAR, userid VARCHAR) WITH (kafka_topic='users', value_format='JSON', key = 'userid'); + +-- Create a persistent query by using the CREATE STREAM keywords to precede the SELECT statement +CREATE STREAM pageviews_enriched AS SELECT users_original.userid AS userid, pageid, regionid, gender FROM pageviews_original LEFT JOIN users_original ON pageviews_original.userid = users_original.userid; + +-- Create a new persistent query where a condition limits the streams content, using WHERE +CREATE STREAM pageviews_female AS SELECT * FROM pageviews_enriched WHERE gender = 'FEMALE'; + +-- Create a new persistent query where another condition is met, using LIKE +CREATE STREAM pageviews_female_like_89 WITH (kafka_topic='pageviews_enriched_r8_r9') AS SELECT * FROM pageviews_female WHERE regionid LIKE '%_8' OR regionid LIKE '%_9'; + +-- Create a new persistent query that counts the pageviews for each region and gender combination in a tumbling window of 30 seconds when the count is greater than one +CREATE TABLE pageviews_regions WITH (VALUE_FORMAT='avro') AS SELECT gender, regionid , COUNT(*) AS numusers FROM pageviews_enriched WINDOW TUMBLING (size 30 second) GROUP BY gender, regionid HAVING COUNT(*) > 1; \ No newline at end of file diff --git a/charts/cp-ksql-server/templates/NOTES.txt b/charts/cp-ksql-server/templates/NOTES.txt new file mode 100644 index 00000000..14872ebd --- /dev/null +++ b/charts/cp-ksql-server/templates/NOTES.txt @@ -0,0 +1,3 @@ +This chart installs Confluent KSQL Server. + +https://docs.confluent.io/current/ksql/docs diff --git a/charts/cp-ksql-server/templates/_helpers.tpl b/charts/cp-ksql-server/templates/_helpers.tpl new file mode 100644 index 00000000..0107fbe5 --- /dev/null +++ b/charts/cp-ksql-server/templates/_helpers.tpl @@ -0,0 +1,64 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cp-ksql-server.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cp-ksql-server.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cp-ksql-server.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified kafka headless name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "cp-ksql-server.cp-kafka-headless.fullname" -}} +{{- $name := "cp-kafka-headless" -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Form the Kafka URL. If Kafka is installed as part of this chart, use k8s service discovery, +else use user-provided URL +*/}} +{{- define "cp-ksql-server.kafka.bootstrapServers" -}} +{{- if .Values.kafka.bootstrapServers -}} +{{- .Values.kafka.bootstrapServers -}} +{{- else -}} +{{- printf "PLAINTEXT://%s:9092" (include "cp-ksql-server.cp-kafka-headless.fullname" .) -}} +{{- end -}} +{{- end -}} + +{{/* +Default Server Pool Id to Release Name but allow it to be overridden +*/}} +{{- define "cp-ksql-server.serviceId" -}} +{{- if .Values.overrideServiceId -}} +{{- .Values.overrideServiceId -}} +{{- else -}} +{{- .Release.Name -}} +{{- end -}} +{{- end -}} diff --git a/charts/cp-ksql-server/templates/deployment.yaml b/charts/cp-ksql-server/templates/deployment.yaml new file mode 100644 index 00000000..59f341d6 --- /dev/null +++ b/charts/cp-ksql-server/templates/deployment.yaml @@ -0,0 +1,93 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ template "cp-ksql-server.fullname" . }} + labels: + app: {{ template "cp-ksql-server.name" . }} + chart: {{ template "cp-ksql-server.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "cp-ksql-server.name" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "cp-ksql-server.name" . }} + release: {{ .Release.Name }} + {{- if .Values.prometheus.jmx.enabled }} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: {{ .Values.prometheus.jmx.port | quote }} + {{- end }} + spec: + containers: + {{- if .Values.prometheus.jmx.enabled }} + - name: prometheus-jmx-exporter + image: "{{ .Values.prometheus.jmx.image }}:{{ .Values.prometheus.jmx.imageTag }}" + command: + - java + - -XX:+UnlockExperimentalVMOptions + - -XX:+UseCGroupMemoryLimitForHeap + - -XX:MaxRAMFraction=1 + - -XshowSettings:vm + - -jar + - jmx_prometheus_httpserver.jar + - {{ .Values.prometheus.jmx.port | quote }} + - /etc/jmx-ksql-server/jmx-ksql-server-prometheus.yml + ports: + - containerPort: {{ .Values.prometheus.jmx.port }} + resources: +{{ toYaml .Values.prometheus.jmx.resources | indent 12 }} + volumeMounts: + - name: jmx-config + mountPath: /etc/jmx-ksql-server + {{- end }} + - name: {{ template "cp-ksql-server.name" . }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + ports: + - name: server + containerPort: {{ .Values.servicePort}} + protocol: TCP + {{- if .Values.prometheus.jmx.enabled }} + - containerPort: {{ .Values.jmx.port }} + name: jmx + {{- end }} + resources: +{{ toYaml .Values.resources | indent 12 }} + volumeMounts: + {{- if .Values.ksql.headless }} + - name: ksql-queries + mountPath: /etc/ksql/queries + {{- end }} + env: + - name: KSQL_BOOTSTRAP_SERVERS + value: {{ template "cp-ksql-server.kafka.bootstrapServers" . }} + - name: KSQL_KSQL_SERVICE_ID + value: {{ template "cp-ksql-server.serviceId" . }} + {{- if .Values.ksql.headless }} + - name: KSQL_KSQL_QUERIES_FILE + value: /etc/ksql/queries/queries.sql + {{- else }} + - name: KSQL_LISTENERS + value: http://0.0.0.0:8088 + {{- end }} + {{- if .Values.jmx.port }} + - name: KSQL_JMX_PORT + value: "{{ .Values.jmx.port }}" + {{- end }} + volumes: + {{- if .Values.prometheus.jmx.enabled }} + - name: jmx-config + configMap: + name: {{ template "cp-ksql-server.fullname" . }}-jmx-configmap + {{- end }} + {{- if .Values.ksql.headless }} + - name: ksql-queries + configMap: + name: {{ template "cp-ksql-server.fullname" . }}-ksql-queries-configmap + {{- end }} diff --git a/charts/cp-ksql-server/templates/external-service.yaml b/charts/cp-ksql-server/templates/external-service.yaml new file mode 100644 index 00000000..3b78a3c5 --- /dev/null +++ b/charts/cp-ksql-server/templates/external-service.yaml @@ -0,0 +1,19 @@ +{{- if .Values.external.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "cp-ksql-server.fullname" . }}-external + labels: + app: {{ template "cp-ksql-server.name" . }} + chart: {{ template "cp-ksql-server.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: {{ .Values.external.type }} + ports: + - name: server-proxy + port: {{ .Values.servicePort }} + selector: + app: {{ template "cp-ksql-server.name" . }} + release: {{ .Release.Name }} +{{- end -}} diff --git a/charts/cp-ksql-server/templates/jmx-configmap.yaml b/charts/cp-ksql-server/templates/jmx-configmap.yaml new file mode 100644 index 00000000..582194fd --- /dev/null +++ b/charts/cp-ksql-server/templates/jmx-configmap.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheus.jmx.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "cp-ksql-server.fullname" . }}-jmx-configmap + labels: + app: {{ template "cp-ksql-server.name" . }} + chart: {{ template "cp-ksql-server.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + jmx-ksql-server-prometheus.yml: |+ + jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:{{ .Values.jmx.port }}/jmxrmi + lowercaseOutputName: true + lowercaseOutputLabelNames: true + ssl: false + rules: + - pattern : 'io.confluent.ksql.metrics([^:]+):' + name: "cp_ksql_server_metrics_$1" +{{- end }} diff --git a/charts/cp-ksql-server/templates/ksql-queries-configmap.yaml b/charts/cp-ksql-server/templates/ksql-queries-configmap.yaml new file mode 100644 index 00000000..ff4c6867 --- /dev/null +++ b/charts/cp-ksql-server/templates/ksql-queries-configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "cp-ksql-server.fullname" . }}-ksql-queries-configmap + labels: + app: {{ template "cp-ksql-server.name" . }} + chart: {{ template "cp-ksql-server.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + {{- $files := .Files }} + {{- range tuple "queries.sql" }} + {{ . }}: |- +{{ $files.Get . | indent 4 }} + {{- end }} diff --git a/charts/cp-ksql-server/templates/service.yaml b/charts/cp-ksql-server/templates/service.yaml new file mode 100644 index 00000000..fa0837cf --- /dev/null +++ b/charts/cp-ksql-server/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "cp-ksql-server.fullname" . }} + labels: + app: {{ template "cp-ksql-server.name" . }} + chart: {{ template "cp-ksql-server.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ports: + - name: ksql-server + port: {{ .Values.servicePort }} + selector: + app: {{ template "cp-ksql-server.name" . }} + release: {{ .Release.Name }} diff --git a/charts/cp-ksql-server/values.yaml b/charts/cp-ksql-server/values.yaml new file mode 100644 index 00000000..becf142e --- /dev/null +++ b/charts/cp-ksql-server/values.yaml @@ -0,0 +1,65 @@ +# Default values for cp-ksql-server. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +## Image Info +## ref: https://hub.docker.com/r/confluentinc/cp-ksql-server/ +image: confluentinc/cp-ksql-server +imageTag: 5.0.0 + +## Specify a imagePullPolicy +## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images +imagePullPolicy: IfNotPresent + +servicePort: 8088 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +## Monitoring +## JMX Settings +## ref: https://docs.confluent.io/current/ksql/docs/operations.html +jmx: + port: 5555 + +## Prometheus Exporter Configuration +## ref: https://prometheus.io/docs/instrumenting/exporters/ +prometheus: + ## JMX Exporter Configuration + ## ref: https://github.com/prometheus/jmx_exporter + jmx: + enabled: true + image: solsson/kafka-prometheus-jmx-exporter@sha256 + imageTag: a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8 + port: 5556 + +## External Access +## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-loadbalancer +external: + enabled: false + type: LoadBalancer + +## Headless mode +## ref: https://docs.confluent.io/current/ksql/docs/installation/server-config/index.html +ksql: + headless: true + +## You can list load balanced service endpoint, or list of all brokers (which is hard in K8s). e.g.: +## bootstrapServers: "PLAINTEXT://dozing-prawn-kafka-headless:9092" +kafka: + bootstrapServers: "" + +## e.g. gnoble-panther-cp-schema-registry:8081 +cp-schema-registry: + url: "" diff --git a/examples/ksql-demo.yaml b/examples/ksql-demo.yaml index 7c25f299..29a7f847 100644 --- a/examples/ksql-demo.yaml +++ b/examples/ksql-demo.yaml @@ -8,7 +8,7 @@ # $ kubectl apply -f examples/ksql-demo.yaml # # Run KSQL CLI: -# $ kubectl exec -it ksql-demo --container ksql -- /bin/bash ksql +# $ kubectl exec -it ksql-demo --container ksql -- /bin/bash ksql # ksql> list topics ; # ksql> print 'pageviews'; # @@ -23,23 +23,20 @@ metadata: spec: containers: - name: ksql-datagen-pageviews - image: confluentinc/ksql-examples:5.0.0-beta1 + image: confluentinc/ksql-examples:5.0.0 command: - sh - -c - - "exec java -jar /usr/share/java/ksql-examples/ksql-examples-5.0.0-SNAPSHOT-standalone.jar quickstart=pageviews format=delimited topic=pageviews bootstrap-server=my-confluent-oss-cp-kafka:9092" + - "exec ksql-datagen quickstart=pageviews format=delimited topic=pageviews bootstrap-server=my-confluent-oss-cp-kafka:9092" - name: ksql-datagen-users - image: confluentinc/ksql-examples:5.0.0-beta1 + image: confluentinc/ksql-examples:5.0.0 command: - sh - -c - - "exec java -jar /usr/share/java/ksql-examples/ksql-examples-5.0.0-SNAPSHOT-standalone.jar quickstart=users format=json topic=users iterations=1000 bootstrap-server=my-confluent-oss-cp-kafka:9092" + - "ksql-datagen quickstart=users format=json topic=users iterations=1000 bootstrap-server=my-confluent-oss-cp-kafka:9092" - name: ksql - image: confluentinc/ksql-cli:5.0.0-beta1 - env: - - name: KSQL_OPTS - value: "-Dbootstrap.servers=my-confluent-oss-cp-kafka:9092" + image: confluentinc/ksql-cli:5.0.0 command: - - sh - - -c - - "ksql-server-start /etc/ksql/ksql-server.properties" + - sh + - -c + - "exec tail -f /dev/null" diff --git a/grafana-dashboard/confluent-open-source-grafana-dashboard.json b/grafana-dashboard/confluent-open-source-grafana-dashboard.json index 4d3882fd..3d4dfbac 100644 --- a/grafana-dashboard/confluent-open-source-grafana-dashboard.json +++ b/grafana-dashboard/confluent-open-source-grafana-dashboard.json @@ -14,7 +14,13 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.1.2" + "version": "5.1.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" }, { "type": "datasource", @@ -40,7 +46,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1527775913244, + "iteration": 1529267754059, "links": [], "panels": [ { @@ -1347,19 +1353,18 @@ "x": 0, "y": 1 }, - "id": 26, + "id": 97, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ - "#d44a3a", + "#299c46", "rgba(237, 129, 40, 0.89)", - "#299c46" + "#d44a3a" ], "datasource": null, - "description": "Quorum Size of Zookeeper ensemble", "format": "none", "gauge": { "maxValue": 100, @@ -1372,9 +1377,9 @@ "h": 4, "w": 4, "x": 0, - "y": 3 + "y": 2 }, - "id": 52, + "id": 99, "interval": null, "links": [], "mappingType": 1, @@ -1411,14 +1416,14 @@ "tableColumn": "", "targets": [ { - "expr": "avg(cp_zookeeper_quorumsize{release=\"$Release\"})", + "expr": "cp_kafka_connect_connect_worker_metrics_connector_count", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "2,3", - "title": "Quorum Size", + "thresholds": "1000000000", + "title": "Number of Connectors", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -1428,7 +1433,7 @@ "value": "null" } ], - "valueName": "current" + "valueName": "avg" }, { "cacheTimeout": null, @@ -1440,7 +1445,7 @@ "#d44a3a" ], "datasource": null, - "description": "Number of Alive Connections", + "description": "The total number of connector starts that succeeded.", "format": "none", "gauge": { "maxValue": 100, @@ -1453,9 +1458,9 @@ "h": 4, "w": 4, "x": 4, - "y": 3 + "y": 2 }, - "id": 54, + "id": 103, "interval": null, "links": [], "mappingType": 1, @@ -1492,14 +1497,14 @@ "tableColumn": "", "targets": [ { - "expr": "sum(cp_zookeeper_numaliveconnections{release=\"$Release\"})", + "expr": "cp_kafka_connect_connect_worker_metrics_connector_startup_success_total", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "60,120", - "title": "Alive Connections", + "thresholds": "100000000", + "title": "Connector Startup Success Total", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -1509,134 +1514,88 @@ "value": "null" } ], - "valueName": "current" + "valueName": "avg" }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 10 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Outstanding Requests alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": null, - "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", - "fill": 1, + "description": "The total number of connector starts that failed.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { - "h": 8, - "w": 16, + "h": 4, + "w": 4, "x": 8, - "y": 3 - }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "y": 2 }, - "lines": true, - "linewidth": 1, + "id": 104, + "interval": null, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + "mappingType": 1, + "mappingTypes": [ { - "expr": "cp_zookeeper_outstandingrequests{release=\"$Release\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", - "refId": "A" + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 } ], - "thresholds": [ + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 10 + "from": "null", + "text": "N/A", + "to": "null" } ], - "timeFrom": null, - "timeShift": null, - "title": "Outstanding Requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - "yaxes": [ + "tableColumn": "", + "targets": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "expr": "cp_kafka_connect_connect_worker_metrics_connector_startup_failure_total", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "1,1", + "title": "Connector Startup Failure Total", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "valueName": "avg" }, { "cacheTimeout": null, @@ -1659,10 +1618,10 @@ "gridPos": { "h": 4, "w": 4, - "x": 0, - "y": 7 + "x": 12, + "y": 2 }, - "id": 64, + "id": 101, "interval": null, "links": [], "mappingType": 1, @@ -1699,14 +1658,14 @@ "tableColumn": "", "targets": [ { - "expr": "avg(cp_zookeeper_inmemorydatatree_nodecount{release=\"$Release\"})", + "expr": "cp_kafka_connect_connect_worker_metrics_task_count", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "500,800", - "title": "Number of ZNodes", + "thresholds": "10000000000", + "title": "Number of Tasks", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -1716,7 +1675,7 @@ "value": "null" } ], - "valueName": "current" + "valueName": "avg" }, { "cacheTimeout": null, @@ -1728,7 +1687,7 @@ "#d44a3a" ], "datasource": null, - "description": "Number of Watchers", + "description": "The total number of task starts that succeeded.", "format": "none", "gauge": { "maxValue": 100, @@ -1740,10 +1699,10 @@ "gridPos": { "h": 4, "w": 4, - "x": 4, - "y": 7 + "x": 16, + "y": 2 }, - "id": 66, + "id": 105, "interval": null, "links": [], "mappingType": 1, @@ -1780,14 +1739,14 @@ "tableColumn": "", "targets": [ { - "expr": "sum(cp_zookeeper_inmemorydatatree_watchcount{release=\"$Release\"})", + "expr": "cp_kafka_connect_connect_worker_metrics_task_startup_success_total", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "100,200", - "title": "Number of Watchers", + "thresholds": "10000000000", + "title": "Task Startup Success Total", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -1797,92 +1756,88 @@ "value": "null" } ], - "valueName": "current" + "valueName": "avg" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": null, - "description": "Aggregated Kafka Broker Pods CPU Usage", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 11 + "description": "The total number of task starts that failed.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - "id": 85, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 2 }, - "lines": true, - "linewidth": 1, + "id": 106, + "interval": null, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "sum(rate (container_cpu_usage_seconds_total{namespace=\"default\",pod_name=~\"$Release-cp-zookeeper-(\\\\d+)\"}[5m])) by (pod_name)", + "expr": "cp_kafka_connect_connect_worker_metrics_task_startup_failure_total", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{pod_name}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "thresholds": "1,1", + "title": "Task Startup Failure Total", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "valueName": "avg" }, { "aliasColors": {}, @@ -1890,15 +1845,14 @@ "dashLength": 10, "dashes": false, "datasource": null, - "description": "Kafka Broker Pods Memory Usage", "fill": 1, "gridPos": { "h": 7, "w": 8, - "x": 8, - "y": 11 + "x": 0, + "y": 6 }, - "id": 87, + "id": 93, "legend": { "avg": false, "current": false, @@ -1922,17 +1876,16 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{namespace=\"default\",pod_name=~\"$Release-cp-zookeeper-(\\\\d+)\"}) by (pod_name)", + "expr": "cp_kafka_connect_connect_metrics_connect_1_io_ratio", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{pod_name}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Connect Metrics: IO Rate", "tooltip": { "shared": true, "sort": 0, @@ -1948,7 +1901,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1975,15 +1928,14 @@ "dashLength": 10, "dashes": false, "datasource": null, - "description": "Kafka Broker Pods Disk Usage", "fill": 1, "gridPos": { "h": 7, "w": 8, - "x": 16, - "y": 11 + "x": 8, + "y": 6 }, - "id": 89, + "id": 91, "legend": { "avg": false, "current": false, @@ -2007,17 +1959,17 @@ "steppedLine": false, "targets": [ { - "expr": "kubelet_volume_stats_used_bytes{namespace=\"default\",persistentvolumeclaim=~\"datadir-$Release-cp-zookeeper.*\"}", + "expr": "cp_kafka_connect_connect_metrics_connect_1_incoming_byte_rate", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{persistentvolumeclaim}}", + "legendFormat": "{{kubernetes_pod_name}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Disk Usage", + "title": "Connect Metrics: Incoming Byte Rate", "tooltip": { "shared": true, "sort": 0, @@ -2033,7 +1985,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -2060,15 +2012,14 @@ "dashLength": 10, "dashes": false, "datasource": null, - "description": "Amount of time it takes for the server to respond to a client request", "fill": 1, "gridPos": { - "h": 8, + "h": 7, "w": 8, - "x": 0, - "y": 18 + "x": 16, + "y": 6 }, - "id": 10, + "id": 95, "legend": { "avg": false, "current": false, @@ -2092,7 +2043,7 @@ "steppedLine": false, "targets": [ { - "expr": "cp_zookeeper_minrequestlatency{release=\"$Release\"}", + "expr": "cp_kafka_connect_connect_metrics_connect_1_network_io_rate", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}}", @@ -2102,7 +2053,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Minimum", + "title": "Connect Metrics: Network IO Rate", "tooltip": { "shared": true, "sort": 0, @@ -2138,300 +2089,48 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Confluent Kafka Connect", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 77, + "panels": [ { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 10 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Request Latency - Average alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], "datasource": null, - "description": "Amount of time it takes for the server to respond to a client request", - "fill": 1, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 18 + "description": "Number of Kafka REST servers", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "gridPos": { + "h": 4, + "w": 5, + "x": 0, + "y": 3 }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "cp_zookeeper_avgrequestlatency{release=\"$Release\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 10 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency - Average", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 20 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Request Latency - Maximum alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Amount of time it takes for the server to respond to a client request", - "fill": 1, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "cp_zookeeper_maxrequestlatency{release=\"$Release\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 20 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency - Maximum", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Confluent Zookeeper", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "id": 79, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": null, - "description": "Number of Kafka Schema Registry servers", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 5, - "x": 0, - "y": 4 - }, - "id": 72, - "interval": null, + "id": 68, + "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ @@ -2467,14 +2166,14 @@ "tableColumn": "", "targets": [ { - "expr": "count(cp_kafka_schema_registry_jetty_metrics_connections_active{release=\"$Release\"})", + "expr": "count(cp_kafka_rest_jetty_metrics_connections_active{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "1,1", - "title": "Number of Kafka Schema Registry servers", + "title": "Number of Kafka REST servers", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -2509,9 +2208,9 @@ "h": 4, "w": 5, "x": 5, - "y": 4 + "y": 3 }, - "id": 73, + "id": 71, "interval": null, "links": [], "mappingType": 1, @@ -2548,7 +2247,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_schema_registry_jetty_metrics_connections_active{release=\"$Release\"}", + "expr": "cp_kafka_rest_jetty_metrics_connections_active{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2590,9 +2289,9 @@ "h": 4, "w": 5, "x": 10, - "y": 4 + "y": 3 }, - "id": 74, + "id": 69, "interval": null, "links": [], "mappingType": 1, @@ -2629,7 +2328,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_schema_registry_jetty_metrics_connections_opened_rate{release=\"$Release\"}", + "expr": "cp_kafka_rest_jetty_metrics_connections_opened_rate{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2671,9 +2370,9 @@ "h": 4, "w": 5, "x": 15, - "y": 4 + "y": 3 }, - "id": 75, + "id": 70, "interval": null, "links": [], "mappingType": 1, @@ -2710,7 +2409,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_schema_registry_jetty_metrics_connections_closed_rate{release=\"$Release\"}", + "expr": "cp_kafka_rest_jetty_metrics_connections_closed_rate{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2730,18 +2429,538 @@ "valueName": "current" } ], - "title": "Confluent Schema Registry", + "title": "Confluent Kafka REST", "type": "row" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 3 }, - "id": 77, + "id": 110, + "panels": [], + "title": "Confluent KSQL Server", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of bytes consumed.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 108, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_bytes_consumed_total", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Bytes Consumed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Messages consumed per second.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 114, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_messages_consumed_per_sec", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Messages Consumed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Messages produced per second.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 116, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_messages_produced_per_sec", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Messages Produced", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active queries.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 118, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_num_active_queries", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Active Queries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of idle queries.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 119, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_num_idle_queries", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Idle Queries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of persistent queries.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 120, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cp_ksql_server_metrics_num_persistent_queries", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "KSQL Metrics: Persistent Queries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 79, "panels": [ { "cacheTimeout": null, @@ -2753,7 +2972,7 @@ "#299c46" ], "datasource": null, - "description": "Number of Kafka REST servers", + "description": "Number of Kafka Schema Registry servers", "format": "none", "gauge": { "maxValue": 100, @@ -2766,9 +2985,9 @@ "h": 4, "w": 5, "x": 0, - "y": 5 + "y": 4 }, - "id": 68, + "id": 72, "interval": null, "links": [], "mappingType": 1, @@ -2805,14 +3024,14 @@ "tableColumn": "", "targets": [ { - "expr": "count(cp_kafka_rest_jetty_metrics_connections_active{release=\"$Release\"})", + "expr": "count(cp_kafka_schema_registry_jetty_metrics_connections_active{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "1,1", - "title": "Number of Kafka REST servers", + "title": "Number of Kafka Schema Registry servers", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -2847,9 +3066,9 @@ "h": 4, "w": 5, "x": 5, - "y": 5 + "y": 4 }, - "id": 71, + "id": 73, "interval": null, "links": [], "mappingType": 1, @@ -2886,7 +3105,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_rest_jetty_metrics_connections_active{release=\"$Release\"}", + "expr": "cp_kafka_schema_registry_jetty_metrics_connections_active{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2928,9 +3147,9 @@ "h": 4, "w": 5, "x": 10, - "y": 5 + "y": 4 }, - "id": 69, + "id": 74, "interval": null, "links": [], "mappingType": 1, @@ -2967,7 +3186,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_rest_jetty_metrics_connections_opened_rate{release=\"$Release\"}", + "expr": "cp_kafka_schema_registry_jetty_metrics_connections_opened_rate{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3009,9 +3228,9 @@ "h": 4, "w": 5, "x": 15, - "y": 5 + "y": 4 }, - "id": 70, + "id": 75, "interval": null, "links": [], "mappingType": 1, @@ -3048,7 +3267,7 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_rest_jetty_metrics_connections_closed_rate{release=\"$Release\"}", + "expr": "cp_kafka_schema_registry_jetty_metrics_connections_closed_rate{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3068,7 +3287,7 @@ "valueName": "current" } ], - "title": "Confluent Kafka REST", + "title": "Confluent Schema Registry", "type": "row" }, { @@ -3077,20 +3296,21 @@ "h": 1, "w": 24, "x": 0, - "y": 4 + "y": 17 }, - "id": 97, + "id": 26, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ - "#299c46", + "#d44a3a", "rgba(237, 129, 40, 0.89)", - "#d44a3a" + "#299c46" ], "datasource": null, + "description": "Quorum Size of Zookeeper ensemble", "format": "none", "gauge": { "maxValue": 100, @@ -3103,9 +3323,9 @@ "h": 4, "w": 4, "x": 0, - "y": 1 + "y": 3 }, - "id": 99, + "id": 52, "interval": null, "links": [], "mappingType": 1, @@ -3142,14 +3362,14 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_connector_count", + "expr": "avg(cp_zookeeper_quorumsize{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "1000000000", - "title": "Number of Connectors", + "thresholds": "2,3", + "title": "Quorum Size", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -3159,7 +3379,7 @@ "value": "null" } ], - "valueName": "avg" + "valueName": "current" }, { "cacheTimeout": null, @@ -3171,7 +3391,7 @@ "#d44a3a" ], "datasource": null, - "description": "The total number of connector starts that succeeded.", + "description": "Number of Alive Connections", "format": "none", "gauge": { "maxValue": 100, @@ -3184,9 +3404,9 @@ "h": 4, "w": 4, "x": 4, - "y": 1 + "y": 3 }, - "id": 103, + "id": 54, "interval": null, "links": [], "mappingType": 1, @@ -3223,14 +3443,14 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_connector_startup_success_total", + "expr": "sum(cp_zookeeper_numaliveconnections{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "100000000", - "title": "Connector Startup Success Total", + "thresholds": "60,120", + "title": "Alive Connections", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -3240,88 +3460,134 @@ "value": "null" } ], - "valueName": "avg" + "valueName": "current" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "description": "The total number of connector starts that failed.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 10 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Outstanding Requests alert", + "noDataState": "no_data", + "notifications": [] }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", + "fill": 1, "gridPos": { - "h": 4, - "w": 4, + "h": 8, + "w": 16, "x": 8, - "y": 1 + "y": 3 }, - "id": 104, - "interval": null, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "range to text", - "value": 2 + "expr": "cp_zookeeper_outstandingrequests{release=\"$Release\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}}", + "refId": "A" } ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "thresholds": [ { - "from": "null", - "text": "N/A", - "to": "null" + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10 } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "timeFrom": null, + "timeShift": null, + "title": "Outstanding Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "tableColumn": "", - "targets": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_connector_startup_failure_total", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "1,1", - "title": "Connector Startup Failure Total", - "type": "singlestat", - "valueFontSize": "200%", - "valueMaps": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "valueName": "avg" + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cacheTimeout": null, @@ -3344,10 +3610,10 @@ "gridPos": { "h": 4, "w": 4, - "x": 12, - "y": 1 + "x": 0, + "y": 7 }, - "id": 101, + "id": 64, "interval": null, "links": [], "mappingType": 1, @@ -3384,14 +3650,14 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_task_count", + "expr": "avg(cp_zookeeper_inmemorydatatree_nodecount{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "10000000000", - "title": "Number of Tasks", + "thresholds": "500,800", + "title": "Number of ZNodes", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -3401,7 +3667,7 @@ "value": "null" } ], - "valueName": "avg" + "valueName": "current" }, { "cacheTimeout": null, @@ -3413,7 +3679,7 @@ "#d44a3a" ], "datasource": null, - "description": "The total number of task starts that succeeded.", + "description": "Number of Watchers", "format": "none", "gauge": { "maxValue": 100, @@ -3425,10 +3691,10 @@ "gridPos": { "h": 4, "w": 4, - "x": 16, - "y": 1 + "x": 4, + "y": 7 }, - "id": 105, + "id": 66, "interval": null, "links": [], "mappingType": 1, @@ -3465,14 +3731,14 @@ "tableColumn": "", "targets": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_task_startup_success_total", + "expr": "sum(cp_zookeeper_inmemorydatatree_watchcount{release=\"$Release\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "10000000000", - "title": "Task Startup Success Total", + "thresholds": "100,200", + "title": "Number of Watchers", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ @@ -3482,88 +3748,262 @@ "value": "null" } ], - "valueName": "avg" + "valueName": "current" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": null, - "description": "The total number of task starts that failed.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "description": "Aggregated Kafka Broker Pods CPU Usage", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate (container_cpu_usage_seconds_total{namespace=\"default\",pod_name=~\"$Release-cp-zookeeper-(\\\\d+)\"}[5m])) by (pod_name)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Kafka Broker Pods Memory Usage", + "fill": 1, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 1 + "h": 7, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 87, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{namespace=\"default\",pod_name=~\"$Release-cp-zookeeper-(\\\\d+)\"}) by (pod_name)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "id": 106, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Kafka Broker Pods Disk Usage", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 11 }, - "tableColumn": "", + "id": 89, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "cp_kafka_connect_connect_worker_metrics_task_startup_failure_total", + "expr": "kubelet_volume_stats_used_bytes{namespace=\"default\",persistentvolumeclaim=~\"datadir-$Release-cp-zookeeper.*\"}", "format": "time_series", "intervalFactor": 1, + "legendFormat": "{{persistentvolumeclaim}}", "refId": "A" } ], - "thresholds": "1,1", - "title": "Task Startup Failure Total", - "type": "singlestat", - "valueFontSize": "200%", - "valueMaps": [ + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "valueName": "avg" + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -3571,14 +4011,15 @@ "dashLength": 10, "dashes": false, "datasource": null, + "description": "Amount of time it takes for the server to respond to a client request", "fill": 1, "gridPos": { - "h": 7, + "h": 8, "w": 8, "x": 0, - "y": 5 + "y": 18 }, - "id": 93, + "id": 10, "legend": { "avg": false, "current": false, @@ -3602,16 +4043,17 @@ "steppedLine": false, "targets": [ { - "expr": "cp_kafka_connect_connect_metrics_connect_1_io_ratio", + "expr": "cp_zookeeper_minrequestlatency{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, + "legendFormat": "{{kubernetes_pod_name}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Connect Metrics: IO Rate", + "title": "Request Latency - Minimum", "tooltip": { "shared": true, "sort": 0, @@ -3649,19 +4091,53 @@ } }, { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 10 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Request Latency - Average alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, + "description": "Amount of time it takes for the server to respond to a client request", "fill": 1, "gridPos": { - "h": 7, + "h": 8, "w": 8, "x": 8, - "y": 5 + "y": 18 }, - "id": 91, + "id": 6, "legend": { "avg": false, "current": false, @@ -3685,17 +4161,25 @@ "steppedLine": false, "targets": [ { - "expr": "cp_kafka_connect_connect_metrics_connect_1_incoming_byte_rate", + "expr": "cp_zookeeper_avgrequestlatency{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}}", "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10 + } + ], "timeFrom": null, "timeShift": null, - "title": "Connect Metrics: Incoming Byte Rate", + "title": "Request Latency - Average", "tooltip": { "shared": true, "sort": 0, @@ -3733,19 +4217,53 @@ } }, { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 20 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Request Latency - Maximum alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, + "description": "Amount of time it takes for the server to respond to a client request", "fill": 1, "gridPos": { - "h": 7, + "h": 8, "w": 8, "x": 16, - "y": 5 + "y": 18 }, - "id": 95, + "id": 8, "legend": { "avg": false, "current": false, @@ -3769,17 +4287,25 @@ "steppedLine": false, "targets": [ { - "expr": "cp_kafka_connect_connect_metrics_connect_1_network_io_rate", + "expr": "cp_zookeeper_maxrequestlatency{release=\"$Release\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{kubernetes_pod_name}}", "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 20 + } + ], "timeFrom": null, "timeShift": null, - "title": "Connect Metrics: Network IO Rate", + "title": "Request Latency - Maximum", "tooltip": { "shared": true, "sort": 0, @@ -3817,7 +4343,7 @@ } } ], - "title": "Confluent Kafka Connect", + "title": "Confluent Zookeeper", "type": "row" } ], @@ -3882,5 +4408,5 @@ "timezone": "", "title": "Confluent Open Source", "uid": "AEaSQ97mz", - "version": 4 -} \ No newline at end of file + "version": 3 +} diff --git a/requirements.yaml b/requirements.yaml index 7a575399..e2fa178d 100644 --- a/requirements.yaml +++ b/requirements.yaml @@ -18,4 +18,8 @@ dependencies: - name: cp-kafka-connect version: 0.1.0 repository: file://./charts/cp-kafka-connect - condition: cp-kafka-connect.enabled \ No newline at end of file + condition: cp-kafka-connect.enabled +- name: cp-ksql-server + version: 0.1.0 + repository: file://./charts/cp-ksql-server + condition: cp-ksql-server.enabled