Skip to content

Commit

Permalink
Merge pull request #335 from appuio/improve-redis-health-checks
Browse files Browse the repository at this point in the history
[redis] Improve readiness check
  • Loading branch information
bastjan authored Aug 27, 2021
2 parents 0b34f6f + fa13604 commit c8c4407
Show file tree
Hide file tree
Showing 14 changed files with 250 additions and 12 deletions.
2 changes: 2 additions & 0 deletions appuio/redis/.helmignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@
.project
.idea/
*.tmproj

hack/
2 changes: 1 addition & 1 deletion appuio/redis/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ name: redis
sources:
- https://github.com/bitnami/bitnami-docker-redis
- http://redis.io/
version: 1.0.0
version: 1.1.0
2 changes: 1 addition & 1 deletion appuio/redis/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# redis

![Version: 1.0.0](https://img.shields.io/badge/Version-1.0.0-informational?style=flat-square) ![AppVersion: 6.2.1](https://img.shields.io/badge/AppVersion-6.2.1-informational?style=flat-square)
![Version: 1.1.0](https://img.shields.io/badge/Version-1.1.0-informational?style=flat-square) ![AppVersion: 6.2.1](https://img.shields.io/badge/AppVersion-6.2.1-informational?style=flat-square)

Open source, advanced key-value store. It is often referred to as a data structure server since keys can contain strings, hashes, lists, sets and sorted sets.

Expand Down
19 changes: 19 additions & 0 deletions appuio/redis/hack/redis-failover-scripts/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Redis Failover Experiments

Scripts and random notes for redis sentinel failover experiments.

## Files

- `values-sentinel.yaml` Helm values for a simple redis cluster with sentinels enabled.
- `monitor.sh` Reports status changes for all nodes in a cluster.
- `fill_cluster.sh` Fills a redis cluster with random keys. Takes the id of the current master node as the first argument.
- `random_keys.lua` Used for `fill_cluster.sh`.
- `deny-redis-traffic-to-node-*-networkpolicy.yaml` Deny network traffic to a single node.

## Resources

>> Is it OK to wait until 'master_link_status' becomes 'up', and 'master_sync_in_progress' becomes '0' and 'master_last_io_seconds' becomes >= 0?
> If you have no reason to believe something has gone haywire, this ought to tell you that the initial sync process has completed, yes.
- https://groups.google.com/g/redis-db/c/JPvnyfUWx_Q?pli=1

- https://lzone.de/cheat-sheet/Redis%20Sentinel
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: deny-redis-traffic-node-0
spec:
podSelector:
matchLabels:
release: redis-test-cluster
statefulset.kubernetes.io/pod-name: redis-test-cluster-node-0
# Exception for test-client
ingress:
- from:
- podSelector:
matchLabels:
role: client
egress:
- to:
- podSelector:
matchLabels:
role: client

policyTypes:
- Ingress
- Egress
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: deny-redis-traffic-node-1
spec:
podSelector:
matchLabels:
release: redis-test-cluster
statefulset.kubernetes.io/pod-name: redis-test-cluster-node-1
# Exception for test-client
ingress:
- from:
- podSelector:
matchLabels:
role: client
egress:
- to:
- podSelector:
matchLabels:
role: client

policyTypes:
- Ingress
- Egress
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: deny-redis-traffic-node-2
spec:
podSelector:
matchLabels:
release: redis-test-cluster
statefulset.kubernetes.io/pod-name: redis-test-cluster-node-2
# Exception for test-client
ingress:
- from:
- podSelector:
matchLabels:
role: client
egress:
- to:
- podSelector:
matchLabels:
role: client

policyTypes:
- Ingress
- Egress
13 changes: 13 additions & 0 deletions appuio/redis/hack/redis-failover-scripts/fill_cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

set -eu

node=redis-test-cluster-node-$1

kubectl cp scratchspace/random_keys.lua ${node}:/tmp/random_keys.lua -credis;

for i in {0..1000}
do
kubectl exec ${node} -it -c redis -- 2>/dev/null redis-cli -h localhost -p 6379 -a $REDIS_PASSWORD --eval /tmp/random_keys.lua
kubectl exec ${node} -it -c redis -- 2>/dev/null redis-cli -h localhost -p 6379 -a $REDIS_PASSWORD dbsize
done
22 changes: 22 additions & 0 deletions appuio/redis/hack/redis-failover-scripts/monitor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

echo Monitoring nodes

declare -A last_state=()

while :
do
for i in {0..2}
do
node=redis-test-cluster-node-$i
response=$(
kubectl exec ${node} -c redis -- redis-cli -h localhost -p 6379 --no-auth-warning -a $REDIS_PASSWORD --eval /health/node_ready.lua 2>&1
)
if [ "$response" != "${last_state[$node]}" ]
then
echo "### $(date +%R:%S): Node $node state changed"
echo "'${last_state[$node]}' -> '$response'"
last_state[$node]=$response
fi
done
done
18 changes: 18 additions & 0 deletions appuio/redis/hack/redis-failover-scripts/random_keys.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
local random_string = function(length)
local res = ""
for i = 1, length do
res = res .. string.char(math.random(97, 122))
end
return res
end

-- Seeds random
-- https://redis.io/commands/eval#selective-replication-of-commands
redis.replicate_commands()

for _ = 1, 100000, 1 do
local str = random_string(10)
redis.call("SET", "RAND_"..str.."key", str);
end

return redis.status_reply("ok")
26 changes: 26 additions & 0 deletions appuio/redis/hack/redis-failover-scripts/values-sentinel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
password: Fbma0DPVG7
cluster:
slaveCount: 3
podDisruptionBudget:
enabled: true
minAvailable: ""
maxUnavailable: 1
slave:
podAnnotations:
restart: Wed Aug 18 15:29:07 CEST 2021
persistence:
size: 16Gi
readinessProbe:
initialDelaySeconds: 30
sentinel:
enabled: true
staticID: true
downAfterMilliseconds: 3000
failoverTimeout: 5000
resources:
requests:
cpu: 100m
memory: 32Mi
limits:
cpu: 200m
memory: 64Mi
41 changes: 41 additions & 0 deletions appuio/redis/node_ready.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
local raw_state = redis.call("info", "replication")

local split = function(text, delim)
return text:gmatch("[^"..delim.."]+")
end

local collect = function(iter)
local elements = {}
for s in iter do table.insert(elements, s); end
return elements
end

local has_prefix = function(text, prefix)
return text:find(prefix, 1, true) == 1
end

local replication_state = {}
for s in split(raw_state, "\r\n") do
(function(s)
if has_prefix(s,"#") then
return
end

local kv = collect(split(s, ":"))
replication_state[kv[1]] = kv[2]
end)(s)
end

local isSlave = replication_state["role"] == "slave"
local isMasterLinkDown = replication_state["master_link_status"] == "down"
local isSyncing = replication_state["master_sync_in_progress"] == "1"

if isSlave and isMasterLinkDown then
if isSyncing then
return redis.error_reply("node is syncing")
else
return redis.error_reply("link to master down")
end
end

return redis.status_reply("ready")
33 changes: 33 additions & 0 deletions appuio/redis/templates/health-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,39 @@ metadata:
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
data:
{{- $files := .Files }}
{{- range tuple "node_ready.lua" }}
{{ . }}: |- {{ range $files.Lines . }}
{{ . }}{{ end }}
{{- end }}
extended_readiness_local.sh: |-
#!/bin/bash
{{- if .Values.usePasswordFile }}
password_aux=`cat ${REDIS_PASSWORD_FILE}`
export REDIS_PASSWORD=$password_aux
{{- end }}
export REDISCLI_AUTH="$REDIS_PASSWORD"
response=$(
timeout -s 3 $1 \
redis-cli \
-h localhost \
{{- if .Values.tls.enabled }}
-p $REDIS_TLS_PORT \
--tls \
--cacert {{ template "redis.tlsCACert" . }} \
{{- if .Values.tls.authClients }}
--cert {{ template "redis.tlsCert" . }} \
--key {{ template "redis.tlsCertKey" . }} \
{{- end }}
{{- else }}
-p $REDIS_PORT \
{{- end }}
--eval /health/node_ready.lua
)
if [ "$response" != "ready" ]; then
echo "$response"
exit 1
fi
ping_readiness_local.sh: |-
#!/bin/bash
{{- if .Values.usePasswordFile }}
Expand Down
12 changes: 2 additions & 10 deletions appuio/redis/templates/redis-node-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,7 @@ spec:
command:
- sh
- -c
{{- if .Values.sentinel.enabled }}
- /health/ping_liveness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }}
{{- else }}
- /health/ping_liveness_local_and_master.sh {{ .Values.slave.livenessProbe.timeoutSeconds }}
{{- end }}
{{- else if .Values.slave.customLivenessProbe }}
livenessProbe: {{- toYaml .Values.slave.customLivenessProbe | nindent 12 }}
{{- end }}
Expand All @@ -183,11 +179,7 @@ spec:
command:
- sh
- -c
{{- if .Values.sentinel.enabled }}
- /health/ping_readiness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }}
{{- else }}
- /health/ping_readiness_local_and_master.sh {{ .Values.slave.livenessProbe.timeoutSeconds }}
{{- end }}
- /health/extended_readiness_local.sh {{ .Values.slave.livenessProbe.timeoutSeconds }}
{{- else if .Values.slave.customReadinessProbe }}
readinessProbe: {{- toYaml .Values.slave.customReadinessProbe | nindent 12 }}
{{- end }}
Expand All @@ -214,7 +206,7 @@ spec:
- name: redis-certificates
mountPath: /opt/bitnami/redis/certs
readOnly: true
{{- end }}
{{- end }}
{{- if .Values.extraVolumeMounts }}
{{- include "common.tplvalues.render" ( dict "value" .Values.extraVolumeMounts "context" $ ) | nindent 12 }}
{{- end }}
Expand Down

0 comments on commit c8c4407

Please sign in to comment.