diff --git a/appuio/redis/.helmignore b/appuio/redis/.helmignore index f0c13194..04853261 100644 --- a/appuio/redis/.helmignore +++ b/appuio/redis/.helmignore @@ -19,3 +19,5 @@ .project .idea/ *.tmproj + +hack/ diff --git a/appuio/redis/Chart.yaml b/appuio/redis/Chart.yaml index e27128d1..2ec6d0fe 100644 --- a/appuio/redis/Chart.yaml +++ b/appuio/redis/Chart.yaml @@ -24,4 +24,4 @@ name: redis sources: - https://github.com/bitnami/bitnami-docker-redis - http://redis.io/ -version: 1.0.0 +version: 1.1.0 diff --git a/appuio/redis/README.md b/appuio/redis/README.md index 5519aae0..0200c0c3 100644 --- a/appuio/redis/README.md +++ b/appuio/redis/README.md @@ -1,6 +1,6 @@ # redis -![Version: 1.0.0](https://img.shields.io/badge/Version-1.0.0-informational?style=flat-square) ![AppVersion: 6.2.1](https://img.shields.io/badge/AppVersion-6.2.1-informational?style=flat-square) +![Version: 1.1.0](https://img.shields.io/badge/Version-1.1.0-informational?style=flat-square) ![AppVersion: 6.2.1](https://img.shields.io/badge/AppVersion-6.2.1-informational?style=flat-square) Open source, advanced key-value store. It is often referred to as a data structure server since keys can contain strings, hashes, lists, sets and sorted sets. diff --git a/appuio/redis/hack/redis-failover-scripts/Readme.md b/appuio/redis/hack/redis-failover-scripts/Readme.md new file mode 100644 index 00000000..1635c9d8 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/Readme.md @@ -0,0 +1,19 @@ +# Redis Failover Experiments + +Scripts and random notes for redis sentinel failover experiments. + +## Files + +- `values-sentinel.yaml` Helm values for a simple redis cluster with sentinels enabled. +- `monitor.sh` Reports status changes for all nodes in a cluster. +- `fill_cluster.sh` Fills a redis cluster with random keys. Takes the id of the current master node as the first argument. + - `random_keys.lua` Used for `fill_cluster.sh`. +- `deny-redis-traffic-to-node-*-networkpolicy.yaml` Deny network traffic to a single node. + +## Resources + +>> Is it OK to wait until 'master_link_status' becomes 'up', and 'master_sync_in_progress' becomes '0' and 'master_last_io_seconds' becomes >= 0? +> If you have no reason to believe something has gone haywire, this ought to tell you that the initial sync process has completed, yes. +- https://groups.google.com/g/redis-db/c/JPvnyfUWx_Q?pli=1 + +- https://lzone.de/cheat-sheet/Redis%20Sentinel diff --git a/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-0-networkpolicy.yaml b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-0-networkpolicy.yaml new file mode 100644 index 00000000..9fe7574b --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-0-networkpolicy.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: deny-redis-traffic-node-0 +spec: + podSelector: + matchLabels: + release: redis-test-cluster + statefulset.kubernetes.io/pod-name: redis-test-cluster-node-0 + # Exception for test-client + ingress: + - from: + - podSelector: + matchLabels: + role: client + egress: + - to: + - podSelector: + matchLabels: + role: client + + policyTypes: + - Ingress + - Egress diff --git a/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-1-networkpolicy.yaml b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-1-networkpolicy.yaml new file mode 100644 index 00000000..6ae13b99 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-1-networkpolicy.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: deny-redis-traffic-node-1 +spec: + podSelector: + matchLabels: + release: redis-test-cluster + statefulset.kubernetes.io/pod-name: redis-test-cluster-node-1 + # Exception for test-client + ingress: + - from: + - podSelector: + matchLabels: + role: client + egress: + - to: + - podSelector: + matchLabels: + role: client + + policyTypes: + - Ingress + - Egress diff --git a/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-2-networkpolicy.yaml b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-2-networkpolicy.yaml new file mode 100644 index 00000000..99c55e18 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/deny-redis-traffic-to-node-2-networkpolicy.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: deny-redis-traffic-node-2 +spec: + podSelector: + matchLabels: + release: redis-test-cluster + statefulset.kubernetes.io/pod-name: redis-test-cluster-node-2 + # Exception for test-client + ingress: + - from: + - podSelector: + matchLabels: + role: client + egress: + - to: + - podSelector: + matchLabels: + role: client + + policyTypes: + - Ingress + - Egress diff --git a/appuio/redis/hack/redis-failover-scripts/fill_cluster.sh b/appuio/redis/hack/redis-failover-scripts/fill_cluster.sh new file mode 100644 index 00000000..a8dd7ab2 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/fill_cluster.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -eu + +node=redis-test-cluster-node-$1 + +kubectl cp scratchspace/random_keys.lua ${node}:/tmp/random_keys.lua -credis; + +for i in {0..1000} +do + kubectl exec ${node} -it -c redis -- 2>/dev/null redis-cli -h localhost -p 6379 -a $REDIS_PASSWORD --eval /tmp/random_keys.lua + kubectl exec ${node} -it -c redis -- 2>/dev/null redis-cli -h localhost -p 6379 -a $REDIS_PASSWORD dbsize +done diff --git a/appuio/redis/hack/redis-failover-scripts/monitor.sh b/appuio/redis/hack/redis-failover-scripts/monitor.sh new file mode 100644 index 00000000..7058ed21 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/monitor.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +echo Monitoring nodes + +declare -A last_state=() + +while : +do + for i in {0..2} + do + node=redis-test-cluster-node-$i + response=$( + kubectl exec ${node} -c redis -- redis-cli -h localhost -p 6379 --no-auth-warning -a $REDIS_PASSWORD --eval /health/node_ready.lua 2>&1 + ) + if [ "$response" != "${last_state[$node]}" ] + then + echo "### $(date +%R:%S): Node $node state changed" + echo "'${last_state[$node]}' -> '$response'" + last_state[$node]=$response + fi + done +done diff --git a/appuio/redis/hack/redis-failover-scripts/random_keys.lua b/appuio/redis/hack/redis-failover-scripts/random_keys.lua new file mode 100644 index 00000000..7ce8df77 --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/random_keys.lua @@ -0,0 +1,18 @@ +local random_string = function(length) + local res = "" + for i = 1, length do + res = res .. string.char(math.random(97, 122)) + end + return res +end + +-- Seeds random +-- https://redis.io/commands/eval#selective-replication-of-commands +redis.replicate_commands() + +for _ = 1, 100000, 1 do + local str = random_string(10) + redis.call("SET", "RAND_"..str.."key", str); +end + +return redis.status_reply("ok") diff --git a/appuio/redis/hack/redis-failover-scripts/values-sentinel.yaml b/appuio/redis/hack/redis-failover-scripts/values-sentinel.yaml new file mode 100644 index 00000000..ef6a9aba --- /dev/null +++ b/appuio/redis/hack/redis-failover-scripts/values-sentinel.yaml @@ -0,0 +1,26 @@ +password: Fbma0DPVG7 +cluster: + slaveCount: 3 +podDisruptionBudget: + enabled: true + minAvailable: "" + maxUnavailable: 1 +slave: + podAnnotations: + restart: Wed Aug 18 15:29:07 CEST 2021 + persistence: + size: 16Gi + readinessProbe: + initialDelaySeconds: 30 +sentinel: + enabled: true + staticID: true + downAfterMilliseconds: 3000 + failoverTimeout: 5000 + resources: + requests: + cpu: 100m + memory: 32Mi + limits: + cpu: 200m + memory: 64Mi diff --git a/appuio/redis/node_ready.lua b/appuio/redis/node_ready.lua new file mode 100644 index 00000000..a452ee41 --- /dev/null +++ b/appuio/redis/node_ready.lua @@ -0,0 +1,41 @@ +local raw_state = redis.call("info", "replication") + +local split = function(text, delim) + return text:gmatch("[^"..delim.."]+") +end + +local collect = function(iter) + local elements = {} + for s in iter do table.insert(elements, s); end + return elements +end + +local has_prefix = function(text, prefix) + return text:find(prefix, 1, true) == 1 +end + +local replication_state = {} +for s in split(raw_state, "\r\n") do + (function(s) + if has_prefix(s,"#") then + return + end + + local kv = collect(split(s, ":")) + replication_state[kv[1]] = kv[2] + end)(s) +end + +local isSlave = replication_state["role"] == "slave" +local isMasterLinkDown = replication_state["master_link_status"] == "down" +local isSyncing = replication_state["master_sync_in_progress"] == "1" + +if isSlave and isMasterLinkDown then + if isSyncing then + return redis.error_reply("node is syncing") + else + return redis.error_reply("link to master down") + end +end + +return redis.status_reply("ready") diff --git a/appuio/redis/templates/health-configmap.yaml b/appuio/redis/templates/health-configmap.yaml index 1bb8e74d..76910f68 100644 --- a/appuio/redis/templates/health-configmap.yaml +++ b/appuio/redis/templates/health-configmap.yaml @@ -9,6 +9,39 @@ metadata: heritage: {{ .Release.Service }} release: {{ .Release.Name }} data: + {{- $files := .Files }} + {{- range tuple "node_ready.lua" }} + {{ . }}: |- {{ range $files.Lines . }} + {{ . }}{{ end }} + {{- end }} + extended_readiness_local.sh: |- + #!/bin/bash +{{- if .Values.usePasswordFile }} + password_aux=`cat ${REDIS_PASSWORD_FILE}` + export REDIS_PASSWORD=$password_aux +{{- end }} + export REDISCLI_AUTH="$REDIS_PASSWORD" + response=$( + timeout -s 3 $1 \ + redis-cli \ + -h localhost \ +{{- if .Values.tls.enabled }} + -p $REDIS_TLS_PORT \ + --tls \ + --cacert {{ template "redis.tlsCACert" . }} \ + {{- if .Values.tls.authClients }} + --cert {{ template "redis.tlsCert" . }} \ + --key {{ template "redis.tlsCertKey" . }} \ + {{- end }} +{{- else }} + -p $REDIS_PORT \ +{{- end }} + --eval /health/node_ready.lua + ) + if [ "$response" != "ready" ]; then + echo "$response" + exit 1 + fi ping_readiness_local.sh: |- #!/bin/bash {{- if .Values.usePasswordFile }} diff --git a/appuio/redis/templates/redis-node-statefulset.yaml b/appuio/redis/templates/redis-node-statefulset.yaml index 9ede6417..e096ff71 100644 --- a/appuio/redis/templates/redis-node-statefulset.yaml +++ b/appuio/redis/templates/redis-node-statefulset.yaml @@ -164,11 +164,7 @@ spec: command: - sh - -c - {{- if .Values.sentinel.enabled }} - /health/ping_liveness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }} - {{- else }} - - /health/ping_liveness_local_and_master.sh {{ .Values.slave.livenessProbe.timeoutSeconds }} - {{- end }} {{- else if .Values.slave.customLivenessProbe }} livenessProbe: {{- toYaml .Values.slave.customLivenessProbe | nindent 12 }} {{- end }} @@ -183,11 +179,7 @@ spec: command: - sh - -c - {{- if .Values.sentinel.enabled }} - - /health/ping_readiness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }} - {{- else }} - - /health/ping_readiness_local_and_master.sh {{ .Values.slave.livenessProbe.timeoutSeconds }} - {{- end }} + - /health/extended_readiness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }} {{- else if .Values.slave.customReadinessProbe }} readinessProbe: {{- toYaml .Values.slave.customReadinessProbe | nindent 12 }} {{- end }} @@ -214,7 +206,7 @@ spec: - name: redis-certificates mountPath: /opt/bitnami/redis/certs readOnly: true - {{- end }} + {{- end }} {{- if .Values.extraVolumeMounts }} {{- include "common.tplvalues.render" ( dict "value" .Values.extraVolumeMounts "context" $ ) | nindent 12 }} {{- end }}