diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c299609..6b5bff1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,16 +1,20 @@ +--- name: Test on: push: branches: - master - - 'release/**' - pull_request: + - main + - release/** + pull_request: null + workflow_dispatch: + jobs: golangci-lint: runs-on: ubuntu-22.04 timeout-minutes: 20 steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 with: fetch-depth: 1 - uses: actions/setup-go@v3 @@ -18,22 +22,194 @@ jobs: go-version: 1.19.x - run: sudo apt-get update && sudo apt-get install -y libseccomp-dev - name: golangci-lint - uses: golangci/golangci-lint-action@v3.2.0 + uses: golangci/golangci-lint-action@v3.7.0 with: - version: v1.49.0 + version: v1.55.2 args: --verbose - ubuntu-2204: - name: "Ubuntu 22.04" - # nested virtualization is only available on macOS hosts - runs-on: macos-12 - timeout-minutes: 30 + + create-lxc-image: + name: create-lxc-image + runs-on: ubuntu-22.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4.1.1 + + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + lookup-only: true + + - name: setup lxd + id: s1 + if: steps.cache-restore.outputs.cache-hit != 'true' + run: ./test/setup_lxd.sh + + - name: launch lxc container + id: s2 + if: steps.s1.conclusion == 'success' + run: ./test/launch_test_lxc.sh + + - name: install dependencies and build + id: s3 + if: steps.s2.conclusion == 'success' + run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh + + - name: export image + id: s4 + if: steps.s3.conclusion == 'success' + run: ./test/export_lxc_image.sh test + + - uses: actions/cache/save@v3 + id: s5 + if: steps.s4.conclusion == 'success' + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + + test: + runs-on: ubuntu-22.04 + needs: create-lxc-image + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4.1.1 + - name: setup lxd + run: ./test/setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./test/launch_test_lxc.sh test-export + - name: run test + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh SYNC" + # some source codes may be updated. re-export new image. + - name: export image + run: sudo lxc image alias delete test-export && rm -f /tmp/test-image.tar.zst && ./test/export_lxc_image.sh test + - uses: actions/cache/save@v3 + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + #- name: debug + # run: ./debug.sh + + benchmark: + runs-on: ubuntu-22.04 + needs: test + timeout-minutes: 20 + strategy: + matrix: + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.1 + - name: setup lxd + run: ./test/setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./test/launch_test_lxc.sh test-export + - name: run benchmark (${{ matrix.script }}) + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" + - name: upload plot + id: get_plot + if: matrix.script != 'iperf3/iperf3_host' + run: | + mkdir /tmp/benchmark-results + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-rootful-direct.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-rootful-host.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-host.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-w-b4ns.log /tmp/benchmark-results/. + - uses: actions/upload-artifact@v3 + if: steps.get_plot.conclusion == 'success' + with: + name: benchmark-results + path: /tmp/benchmark-results + - - name: "Cache ~/.vagrant.d/boxes" - uses: actions/cache@v3 + benchmark-multinode: + runs-on: ubuntu-22.04 + needs: test + timeout-minutes: 20 + strategy: + matrix: + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] + steps: + - uses: actions/checkout@v4.1.1 + - name: setup lxd + run: ./test/setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./test/launch_test_lxc.sh test-export + - name: run benchmark (${{ matrix.script }}) + run: ./benchmark/${{ matrix.script }}_multinode.sh + - name: upload plot + run: | + mkdir /tmp/benchmark-results + cp benchmark/${{ matrix.script }}-multinode-rootful.log /tmp/benchmark-results/. + cp benchmark/${{ matrix.script }}-multinode-wo-b4ns.log /tmp/benchmark-results/. + cp benchmark/${{ matrix.script }}-multinode-w-b4ns.log /tmp/benchmark-results/. + - uses: actions/upload-artifact@v3 + with: + name: benchmark-results + path: /tmp/benchmark-results + + plot: + runs-on: ubuntu-22.04 + needs: [benchmark, benchmark-multinode] + steps: + - uses: actions/checkout@v4.1.1 + - run: sudo apt update && sudo apt install python3 python3-pip + - run: pip3 install matplotlib numpy + - uses: actions/download-artifact@v3 + with: + name: benchmark-results + path: ./ + - run: mkdir /tmp/benchmark-plots + - run: python3 benchmark/redis/redis_plot.py redis-rootful-direct.log redis-rootful-host.log redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-w-b4ns.log /tmp/benchmark-plots/redis.png + - run: python3 benchmark/redis/redis_plot.py redis-multinode-rootful.log redis-multinode-wo-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis-multinode.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-rootful-direct.log iperf3-rootful-host.log iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-w-b4ns.log /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-multinode-rootful.log iperf3-multinode-wo-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3-multinode.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-rootful-direct.log postgres-rootful-host.log postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-w-b4ns.log /tmp/benchmark-plots/postgres.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-multinode-rootful.log postgres-multinode-wo-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres-multinode.png + - run: python3 benchmark/block/block_plot.py block-rootful-direct.log block-rootful-host.log block-wo-b4ns-direct.log block-wo-b4ns-host.log block-w-b4ns.log /tmp/benchmark-plots/block.png + - run: python3 benchmark/block/block_plot.py block-multinode-rootful.log block-multinode-wo-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block-multinode.png + - run: python3 benchmark/memcached/memcached_plot.py memcached-rootful-direct.log memcached-rootful-host.log memcached-wo-b4ns-direct.log memcached-wo-b4ns-host.log memcached-w-b4ns.log /tmp/benchmark-plots/memcached.png + - run: python3 benchmark/memcached/memcached_plot.py memcached-multinode-rootful.log memcached-multinode-wo-b4ns.log memcached-multinode-w-b4ns.log /tmp/benchmark-plots/memcached-multinode.png + - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-rootful-direct.log rabbitmq-rootful-host.log rabbitmq-wo-b4ns-direct.log rabbitmq-wo-b4ns-host.log rabbitmq-w-b4ns.log /tmp/benchmark-plots/rabbitmq.png + - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-multinode-rootful.log rabbitmq-multinode-wo-b4ns.log rabbitmq-multinode-w-b4ns.log /tmp/benchmark-plots/rabbitmq-multinode.png + - run: python3 benchmark/etcd/etcd_plot.py etcd-rootful-direct.log etcd-rootful-host.log etcd-wo-b4ns-direct.log etcd-wo-b4ns-host.log etcd-w-b4ns.log /tmp/benchmark-plots/etcd.png + - run: python3 benchmark/etcd/etcd_plot.py etcd-multinode-rootful.log etcd-multinode-wo-b4ns.log etcd-multinode-w-b4ns.log /tmp/benchmark-plots/etcd-multinode.png + - run: python3 benchmark/mysql/mysql_plot.py mysql-rootful-direct.log mysql-rootful-host.log mysql-wo-b4ns-direct.log mysql-wo-b4ns-host.log mysql-w-b4ns.log /tmp/benchmark-plots/mysql.png + - run: python3 benchmark/mysql/mysql_plot.py mysql-multinode-rootful.log mysql-multinode-wo-b4ns.log mysql-multinode-w-b4ns.log /tmp/benchmark-plots/mysql-multinode.png + - uses: actions/upload-artifact@v3 with: - path: ~/.vagrant.d/boxes - key: vagrant-${{ hashFiles('Vagrantfile*') }} + name: benchmark-plots + path: /tmp/benchmark-plots - - run: vagrant up + bench-script: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4.1.1 + - run: sudo ip a add 192.168.6.2/32 dev eth0 + - run: hostname -I + - run: ./test/init_test.sh + - run: ~/bypass4netns/benchmark/run_bench.sh diff --git a/Vagrantfile b/Vagrantfile deleted file mode 100644 index b940cd2..0000000 --- a/Vagrantfile +++ /dev/null @@ -1,122 +0,0 @@ -# -*- mode: ruby -*- -# vi: set ft=ruby : - -Vagrant.configure("2") do |config| - config.vm.box = "ubuntu/jammy64" - memory = 4096 - cpus = 2 - config.vm.provider :virtualbox do |v| - v.memory = memory - v.cpus = cpus - # Avoid 10.0.0.0/8 and 172.0.0.0/8: https://github.com/rootless-containers/bypass4netns/pull/5#issuecomment-1026602768 - v.customize ["modifyvm", :id, "--natnet1", "192.168.6.0/24"] - end - config.vm.provider :libvirt do |v| - v.memory = memory - v.cpus = cpus - end - config.vm.provision "shell", privileged: false, inline: <<~SHELL - #!/bin/bash - set -eu -o pipefail - - NERDCTL_VERSION="0.22.2" - ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" - echo "===== Prepare =====" - ( - set -x - sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap golang python3 - systemctl --user start dbus - - curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxzv /usr/local - containerd-rootless-setuptool.sh install - containerd-rootless-setuptool.sh install-buildkit - nerdctl info - nerdctl pull --quiet "${ALPINE_IMAGE}" - - cd /vagrant - make - sudo rm -f /usr/local/bin/bypass4netns* - sudo make install - - hostname -I | awk '{print $1}' | tee /tmp/host_ip - /vagrant/test/seccomp.json.sh | tee /tmp/seccomp.json - - systemd-run --user --unit run-iperf3 iperf3 -s - ) - - echo "===== `--ignore` option test =====" - ( - set -x - systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug - nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c $(cat /tmp/host_ip) - # TODO: this check is dirty. we want better method to check the connect(2) is ignored. - journalctl --user -u run-bypass4netns.service | grep "is ignored, skipping." - nerdctl rm -f test - systemctl --user stop run-bypass4netns.service - - ) - - echo "===== connect(2),sendto(2) test =====" - ( - systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 - set -x - cd /vagrant/test - /bin/bash test.sh /tmp/seccomp.json $(cat /tmp/host_ip) - systemctl --user stop run-bypass4netns.service - ) - - echo "===== Test bypass4netnsd =====" - ( - set -x - /vagrant/test/test_b4nnd.sh - ) - - echo "===== Benchmark: netns -> host With bypass4netns =====" - ( - set -x - - # start bypass4netnsd for nerdctl integration - systemd-run --user --unit run-bypass4netnsd bypass4netnsd - sleep 1 - nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test - ) - - echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" - ( - set -x - nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test - ) - - echo "===== Benchmark: host -> netns With bypass4netns =====" - ( - set -x - nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 - sleep 1 # waiting `iperf3 -s -4` becomes ready - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test - ) - - echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" - ( - set -x - nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 - sleep 1 - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test - ) - - SHELL -end diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..676904e --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,4 @@ +*.csv +*.png +*.json +*.log \ No newline at end of file diff --git a/benchmark/block/.gitignore b/benchmark/block/.gitignore new file mode 100644 index 0000000..1d04dbb --- /dev/null +++ b/benchmark/block/.gitignore @@ -0,0 +1,2 @@ +blk-* +bench \ No newline at end of file diff --git a/benchmark/block/Dockerfile b/benchmark/block/Dockerfile new file mode 100644 index 0000000..728e4fa --- /dev/null +++ b/benchmark/block/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.21.3 as bench-builder + +COPY bench.go . +# static link +RUN CGO_ENABLED=0 go build -o /bench bench.go + +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y wget multitime nginx +COPY --from=bench-builder /bench /bench + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/benchmark/block/bench.go b/benchmark/block/bench.go new file mode 100644 index 0000000..3a2aa15 --- /dev/null +++ b/benchmark/block/bench.go @@ -0,0 +1,103 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +var ( + url = flag.String("url", "http://localhost/blk-1m", "") + threadNum = flag.Int("thread-num", 1, "") + count = flag.Int("count", 1, "") +) + +type BenchmarkResult struct { + Url string `json:"url"` + Count int `json:"count"` + TotalElapsedSecond float64 `json:"totalElapsedSecond"` + TotalSize int64 `json:"totalSize"` +} + +func main() { + flag.Parse() + + //fmt.Printf("url = %s\n", *url) + //fmt.Printf("thread-num = %d\n", *threadNum) + //fmt.Printf("count = %d\n", *count) + + resultsChan := make(chan BenchmarkResult, *count) + + for i := 0; i < *threadNum; i++ { + go bench(*url, *count, resultsChan) + } + + results := []BenchmarkResult{} + for i := 0; i < *threadNum; i++ { + r := <-resultsChan + results = append(results, r) + } + + res, err := json.Marshal(results) + if err != nil { + fmt.Printf("failed Marshal err=%q", err) + panic("error") + } + fmt.Fprintln(os.Stdout, string(res)) +} + +func bench(url string, count int, resultChan chan BenchmarkResult) { + bufferSize := 1024 * 1024 * 128 // 128 MiB + buffer := make([]byte, bufferSize) + result := BenchmarkResult{ + Url: url, + Count: count, + TotalElapsedSecond: 0, + TotalSize: 0, + } + + for i := 0; i < count; i++ { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + fmt.Printf("failed NewRequest err=%q", err) + panic("error") + } + for { + start := time.Now() + resp, err := http.DefaultClient.Do(req) + if err != nil { + fmt.Printf("failed Do err=%q, retrying... %d/%d", err, i, count) + time.Sleep(100 * time.Millisecond) + continue + } + + if resp.StatusCode != 200 { + fmt.Printf("unexpected status code %d", resp.StatusCode) + panic("error") + } else { + for { + readSize, err := resp.Body.Read(buffer) + if err != nil && err != io.EOF { + fmt.Printf("failed Copy() err=%q", err) + panic("error") + } + if readSize == 0 { + end := time.Now() + elapsed := end.Sub(start).Seconds() + result.TotalElapsedSecond += elapsed + break + } + result.TotalSize += int64(readSize) + } + } + resp.Body.Close() + break + } + } + + resultChan <- result +} diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh new file mode 100755 index 0000000..b161adb --- /dev/null +++ b/benchmark/block/block.sh @@ -0,0 +1,174 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + +IMAGE_NAME="block" +COUNT="10" +THREAD_NUM="1" + +source ~/.profile +. ../param.bash + +./gen_blocks.sh + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit + +sudo nerdctl build -f ./Dockerfile -t $IMAGE_NAME . +nerdctl build -f ./Dockerfile -t $IMAGE_NAME . + +BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') + +echo "===== Benchmark: block rooful via NetNS =====" +( + set +e + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client + set -ex + + sudo nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 + SERVER_IP=$(sudo nerdctl exec block-server hostname -i) + LOG_NAME="block-rootful-direct.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + sudo nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + sudo nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + done + + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client +) + +echo "===== Benchmark: block rootful via host =====" +( + set +e + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client + set -ex + + sudo nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 + LOG_NAME="block-rootful-host.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + sudo nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + sudo nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + set -ex + + nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 + SERVER_IP=$(nerdctl exec block-server hostname -i) + LOG_NAME="block-wo-b4ns-direct.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + set -ex + + nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 + LOG_NAME="block-wo-b4ns-host.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity + LOG_NAME="block-w-b4ns.log" + sleep 5 + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(nerdctl exec block-server hostname -i) + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE + done + + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) + diff --git a/benchmark/block/block_multinode.sh b/benchmark/block/block_multinode.sh new file mode 100755 index 0000000..b223c77 --- /dev/null +++ b/benchmark/block/block_multinode.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f block-server +NAME="test" exec_lxc nerdctl rm -f block-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +IMAGE_NAME="block" +COUNT="10" +BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') + +set -eux -o pipefail + +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/block && sudo nerdctl build -f ./Dockerfile -t $IMAGE_NAME ." +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/block && nerdctl build -f ./Dockerfile -t $IMAGE_NAME ." + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +NAME="test" exec_lxc /home/ubuntu/bypass4netns/benchmark/block/gen_blocks.sh + +echo "===== Benchmark: block rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name block-client $IMAGE_NAME sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + LOG_NAME="block-multinode-rootful.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$TEST1_VXLAN_ADDR/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc sudo nerdctl rm -f block-server + NAME="test2" exec_lxc sudo nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name block-client $IMAGE_NAME sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + LOG_NAME="block-multinode-wo-b4ns.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$TEST1_VXLAN_ADDR/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc nerdctl rm -f block-server + NAME="test2" exec_lxc nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 8080:80 -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec block-server hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity" + LOG_NAME="block-multinode-w-b4ns.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc nerdctl rm -f block-server + NAME="test2" exec_lxc nerdctl rm -f block-client +) diff --git a/benchmark/block/block_plot.py b/benchmark/block/block_plot.py new file mode 100644 index 0000000..a8598cf --- /dev/null +++ b/benchmark/block/block_plot.py @@ -0,0 +1,41 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.25 + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + for l in json.loads(line): + gbps = l["totalSize"] * 8 / l["totalElapsedSecond"] / 1024 / 1024 / 1024 + file = l["url"].split("/")[3] + if file not in data: + data[file] = 0.0 + data[file] += gbps + line = f.readline() + return data + +labels=['blk-1k', 'blk-32k', 'blk-512k', 'blk-1m', 'blk-32m', 'blk-128m', 'blk-512m', 'blk-1g'] + +plt.ylabel("Gbps") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + value = [] + for l in labels: + value.append(data[l]) + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +plt.legend() +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/block/gen_blocks.sh b/benchmark/block/gen_blocks.sh new file mode 100755 index 0000000..9aa0d7b --- /dev/null +++ b/benchmark/block/gen_blocks.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +cd $(dirname $0) + +BLOCK_SIZES=(1 32 128 512) + +for BLOCK_SIZE in ${BLOCK_SIZES[@]} +do + dd if=/dev/urandom of=blk-${BLOCK_SIZE}k bs=1024 count=$BLOCK_SIZE +done + +for BLOCK_SIZE in ${BLOCK_SIZES[@]} +do + dd if=/dev/urandom of=blk-${BLOCK_SIZE}m bs=1048576 count=$BLOCK_SIZE +done + +dd if=/dev/urandom of=blk-1g bs=1048576 count=1024 + diff --git a/benchmark/etcd/Dockerfile b/benchmark/etcd/Dockerfile new file mode 100644 index 0000000..d2a21e4 --- /dev/null +++ b/benchmark/etcd/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.21.3 as bench-builder + +RUN mkdir /etcd +RUN curl -fsSL https://github.com/etcd-io/etcd/archive/refs/tags/v3.5.10.tar.gz | tar -xz --no-same-owner --no-same-permissions --strip-components 1 -C /etcd +RUN ls -l /etcd + +# static link +RUN cd /etcd/tools/benchmark && CGO_ENABLED=0 go build -o /bench main.go + +FROM ubuntu:22.04 + +COPY --from=bench-builder /bench /bench + +CMD ["/bin/bash", "-c", "sleep infinity"] diff --git a/benchmark/etcd/etcd.sh b/benchmark/etcd/etcd.sh new file mode 100755 index 0000000..4a43fe6 --- /dev/null +++ b/benchmark/etcd/etcd.sh @@ -0,0 +1,150 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + + +ETCD_VERSION="v3.3.25" +ETCD_IMAGE="quay.io/coreos/etcd:${ETCD_VERSION}" +BENCH_IMAGE="etcd-bench" + +source ~/.profile +. ../param.bash + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit +sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . +nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . + +sudo nerdctl pull --quiet $ETCD_IMAGE +nerdctl pull --quiet $ETCD_IMAGE + +echo "===== Benchmark: etcd rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed + set -ex + + sudo nerdctl run -d --name etcd-server $ETCD_IMAGE /bin/sh -c "sleep infinity" + SERVER_IP=$(sudo nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server sudo nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + sleep 5 + LOG_NAME="etcd-rootful-direct.log" + sudo nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd rootful via host =====" +( + set +e + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + set -ex + + sudo nerdctl run -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-rootful-host.log" + sudo nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name etcd-server $ETCD_IMAGE /bin/sh -c "sleep infinity" + SERVER_IP=$(nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + sleep 5 + LOG_NAME="etcd-wo-b4ns-direct.log" + nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + set -ex + + nerdctl run -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-wo-b4ns-host.log" + nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /bin/sh -c "sleep infinity" + sleep 5 + SERVER_IP=$(nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + nerdctl run --label nerdctl/bypass4netns=true --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/etcd/etcd_multinode.sh b/benchmark/etcd/etcd_multinode.sh new file mode 100755 index 0000000..3370c0c --- /dev/null +++ b/benchmark/etcd/etcd_multinode.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f etcd-server +NAME="test" exec_lxc nerdctl rm -f etcd-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ETCD_VERSION="v3.3.25" +ETCD_IMAGE="quay.io/coreos/etcd:${ETCD_VERSION}" +BENCH_IMAGE="etcd-bench" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $ETCD_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $ETCD_IMAGE +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/etcd && sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/etcd && nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: etcd rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc systemd-run --user --unit etcd-server sudo nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$TEST1_VXLAN_ADDR:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="etcd-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $TEST1_VXLAN_ADDR:2379 > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f etcd-server + NAME="test" exec_lxc systemctl --user stop etcd-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc sudo nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$TEST1_VXLAN_ADDR:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="etcd-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $TEST1_VXLAN_ADDR:2379 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f etcd-server + NAME="test" exec_lxc systemctl --user stop etcd-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 12379:2379 --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec etcd-server hostname -i) + NAME="test" exec_lxc systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + sleep 5 + LOG_NAME="etcd-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f etcd-server + NAME="test2" exec_lxc nerdctl rm -f etcd-client +) \ No newline at end of file diff --git a/benchmark/etcd/etcd_plot.py b/benchmark/etcd/etcd_plot.py new file mode 100644 index 0000000..2e8a399 --- /dev/null +++ b/benchmark/etcd/etcd_plot.py @@ -0,0 +1,53 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip().replace("\t", " ") + if "Requests/sec" in line: + data["Requests/sec"] = float(line.split(" ")[1]) + if "Average" in line: + data["Latency(ms)"] = float(line.split(" ")[1]) * 1000 + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['Requests/sec', 'Latency(ms)'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Requests / second") +ax2 = ax1.twinx() +ax2.set_ylabel("Average latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i], datas[i][labels[0]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor+BAR_WIDTH*i], datas[i][labels[1]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/iperf3/iperf3.sh b/benchmark/iperf3/iperf3.sh new file mode 100755 index 0000000..5a7394f --- /dev/null +++ b/benchmark/iperf3/iperf3.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +set -eu -o pipefail + +cd $(dirname $0) + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +source ~/.profile +. ../param.bash + +sudo nerdctl pull --quiet $ALPINE_IMAGE +nerdctl pull --quiet $ALPINE_IMAGE + +echo "===== Benchmark: iperf3 rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + set -ex + + sudo nerdctl run -d --name iperf3-server $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + sudo nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(sudo nerdctl exec iperf3-server hostname -i) + sleep 1 + sudo nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-rootful-direct.log + + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 rootful via host =====" +( + set +e + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + sudo nerdctl run -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + sudo nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + + sleep 1 + sudo nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-rootful-host.log + + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name iperf3-server $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(nerdctl exec iperf3-server hostname -i) + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-direct.log + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + sleep 1 + nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.log + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + sleep 1 + nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.log + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(nerdctl exec iperf3-server hostname -i) + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/iperf3/iperf3_host.sh b/benchmark/iperf3/iperf3_host.sh new file mode 100755 index 0000000..ab2950b --- /dev/null +++ b/benchmark/iperf3/iperf3_host.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +set -eu -o pipefail + +cd $(dirname $0) + +source ~/.profile +. ../param.bash + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" +nerdctl pull --quiet "${ALPINE_IMAGE}" + +systemd-run --user --unit run-iperf3 iperf3 -s + +echo "===== Benchmark: netns -> host With bypass4netns =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + # start bypass4netnsd for nerdctl integration + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $HOST_IP + nerdctl rm -f test +) + +echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" +( + set +e + nerdctl rm -f test + set -ex + + nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $HOST_IP + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns With bypass4netns =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns + systemctl --user reset-failed + set -ex + + nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 + sleep 1 # waiting `iperf3 -s -4` becomes ready + iperf3 -c $HOST_IP -p 8080 + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns2 + systemctl --user reset-failed + set -ex + + nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 + sleep 1 + iperf3 -c $HOST_IP -p 8080 + nerdctl rm -f test +) diff --git a/benchmark/iperf3/iperf3_multinode.sh b/benchmark/iperf3/iperf3_multinode.sh new file mode 100755 index 0000000..00a7f4d --- /dev/null +++ b/benchmark/iperf3/iperf3_multinode.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f iperf3-server +NAME="test" exec_lxc nerdctl rm -f iperf3-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $ALPINE_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $ALPINE_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: iperf3 rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + NAME="test" exec_lxc systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc sudo nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f iperf3-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc sudo nerdctl rm -f iperf3-client +) + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 + + NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-wo-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f iperf3-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f iperf3-client +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 5202:5201 --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 + + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec iperf3-server hostname -i) + NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-multinode-w-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f iperf3-server + NAME="test2" exec_lxc nerdctl rm -f iperf3-client +) \ No newline at end of file diff --git a/benchmark/iperf3/iperf3_plot.py b/benchmark/iperf3/iperf3_plot.py new file mode 100644 index 0000000..744bdd4 --- /dev/null +++ b/benchmark/iperf3/iperf3_plot.py @@ -0,0 +1,30 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.4 + +def load_data(filename): + with open(filename) as f: + return json.load(f) + +labels=['sum_received.bits_per_second'] + +#plt.rcParams["figure.figsize"] = (20,4) +plt.ylabel("Gbps") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data_json = load_data(filename) + value = [data_json["end"]["sum_received"]["bits_per_second"] / 1024 / 1024 / 1024] + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +plt.legend() +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/memcached/memcached.sh b/benchmark/memcached/memcached.sh new file mode 100755 index 0000000..e984ff7 --- /dev/null +++ b/benchmark/memcached/memcached.sh @@ -0,0 +1,135 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + +MEMCACHED_VERSION=1.6.22 +MEMCACHED_IMAGE="memcached:${MEMCACHED_VERSION}" + +MEMTIRE_VERSION=2.0.0 +MEMTIRE_IMAGE="redislabs/memtier_benchmark:${MEMTIRE_VERSION}" + +source ~/.profile +. ../param.bash + +sudo nerdctl pull --quiet $MEMCACHED_IMAGE +sudo nerdctl pull --quiet $MEMTIRE_IMAGE +nerdctl pull --quiet $MEMCACHED_IMAGE +nerdctl pull --quiet $MEMTIRE_IMAGE + +echo "===== Benchmark: memcached rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client + set -ex + + sudo nerdctl run -d --name memcached-server $MEMCACHED_IMAGE + SERVER_IP=$(sudo nerdctl exec memcached-server hostname -i) + LOG_NAME="memcached-rootful-direct.log" + sudo nerdctl run --name memcached-client $MEMTIRE_IMAGE --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + sudo nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached rootful via host =====" +( + set +e + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client + set -ex + + sudo nerdctl run -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-rootful-host.log" + sudo nerdctl run --name memcached-client $MEMTIRE_IMAGE --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + sudo nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + set -ex + + nerdctl run -d --name memcached-server $MEMCACHED_IMAGE + SERVER_IP=$(nerdctl exec memcached-server hostname -i) + LOG_NAME="memcached-wo-b4ns-direct.log" + nerdctl run -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + set -ex + + nerdctl run -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-wo-b4ns-host.log" + nerdctl run -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + SERVER_IP=$(nerdctl exec memcached-server hostname -i) + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/memcached/memcached_multinode.sh b/benchmark/memcached/memcached_multinode.sh new file mode 100755 index 0000000..9f2b1ee --- /dev/null +++ b/benchmark/memcached/memcached_multinode.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f memcached-server +NAME="test" exec_lxc nerdctl rm -f memcached-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +MEMCACHED_VERSION=1.6.22 +MEMCACHED_IMAGE="memcached:${MEMCACHED_VERSION}" +MEMTIRE_VERSION=2.0.0 +MEMTIRE_IMAGE="redislabs/memtier_benchmark:${MEMTIRE_VERSION}" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $MEMCACHED_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MEMCACHED_IMAGE +NAME="test" exec_lxc sudo nerdctl pull --quiet $MEMTIRE_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MEMTIRE_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: memcached rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="memcached-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec memcached-client memtier_benchmark --host=$TEST1_VXLAN_ADDR --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc sudo nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f memcached-server + NAME="test2" exec_lxc sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="memcached-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec memcached-client memtier_benchmark --host=$TEST1_VXLAN_ADDR --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f memcached-server + NAME="test2" exec_lxc nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 11212:11211 --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec memcached-server hostname -i) + sleep 5 + LOG_NAME="memcached-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f memcached-server + NAME="test2" exec_lxc nerdctl rm -f memcached-client +) \ No newline at end of file diff --git a/benchmark/memcached/memcached_plot.py b/benchmark/memcached/memcached_plot.py new file mode 100644 index 0000000..e90ac7e --- /dev/null +++ b/benchmark/memcached/memcached_plot.py @@ -0,0 +1,55 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.4 + +def load_data(filename): + data = {} + with open(filename) as f: + d = json.load(f) + if d == None: + raise Exception("{} has invalid json format".format(filename)) + data["Ops/sec"] = [] + data["Ops/sec"].append(d["ALL STATS"]["Sets"]["Ops/sec"]) + data["Ops/sec"].append(d["ALL STATS"]["Gets"]["Ops/sec"]) + data["Latency"] = [] + data["Latency"].append(d["ALL STATS"]["Sets"]["Latency"]) + data["Latency"].append(d["ALL STATS"]["Gets"]["Latency"]) + return data + +labels=['Sets(Ops/sec)', 'Gets(Ops/sec)', 'Sets(Latency)', 'Gets(Latency)'] + +plt.ylabel("Ops / seconds") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Operations / second") +ax2 = ax1.twinx() +ax2.set_ylabel("latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor+BAR_WIDTH*i], datas[i]["Ops/sec"], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor*2+BAR_WIDTH*i, factor*3+BAR_WIDTH*i], datas[i]["Latency"], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/mysql/Dockerfile b/benchmark/mysql/Dockerfile new file mode 100644 index 0000000..4089b96 --- /dev/null +++ b/benchmark/mysql/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y sysbench + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/benchmark/mysql/mysql.sh b/benchmark/mysql/mysql.sh new file mode 100755 index 0000000..f805714 --- /dev/null +++ b/benchmark/mysql/mysql.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +set -eu -o pipefail + +MYSQL_VERSION=8.2.0 +MYSQL_IMAGE="mysql:$MYSQL_VERSION" +BENCH_IMAGE="mysql-bench" + +source ~/.profile +cd $(dirname $0) +. ../../test/util.sh +. ../param.bash + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit +sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . +nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . + +sudo nerdctl pull --quiet $MYSQL_IMAGE +nerdctl pull --quiet $MYSQL_IMAGE + +echo "===== Benchmark: mysql rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client + set -ex + + sudo nerdctl run -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + sudo nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(sudo nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-rootful-direct.log + + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql rootful via host =====" +( + set +e + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client + set -ex + + sudo nerdctl run -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + sudo nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-rootful-host.log + + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + set -ex + + nerdctl run -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-wo-b4ns-direct.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + set -ex + + nerdctl run -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-wo-b4ns-host.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-w-b4ns.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-port=3306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-port=3306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/mysql/mysql_multinode.sh b/benchmark/mysql/mysql_multinode.sh new file mode 100755 index 0000000..b3428ff --- /dev/null +++ b/benchmark/mysql/mysql_multinode.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f mysql-server +NAME="test" exec_lxc nerdctl rm -f mysql-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +MYSQL_VERSION=8.2.0 +MYSQL_IMAGE="mysql:$MYSQL_VERSION" +BENCH_IMAGE="mysql-bench" + +set -eux -o pipefail + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/mysql && sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/mysql && nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." + +NAME="test" exec_lxc sudo nerdctl pull --quiet $MYSQL_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MYSQL_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: mysql rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name mysql-client $BENCH_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 30 + NAME="test2" exec_lxc sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f mysql-server + NAME="test2" exec_lxc sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name mysql-client $BENCH_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 30 + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-wo-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f mysql-server + NAME="test2" exec_lxc nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity" + SERVER_IP=$(NAME="test" exec_lxc nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-w-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f mysql-server + NAME="test2" exec_lxc nerdctl rm -f mysql-client +) \ No newline at end of file diff --git a/benchmark/mysql/mysql_plot.py b/benchmark/mysql/mysql_plot.py new file mode 100644 index 0000000..c7aad7e --- /dev/null +++ b/benchmark/mysql/mysql_plot.py @@ -0,0 +1,55 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys +import re + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = re.sub('\s+', ' ', line.strip()).split(" ") + if "transactions:" in line: + data["transactions"] = float(line[2].replace("(", "")) + if "queries:" in line: + data["queries"] = float(line[2].replace("(", "")) + if "avg:" in line: + data["latency(ms)"] = float(line[1]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['transactions', 'queries', 'latency(ms)'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Operations / second") +ax2 = ax1.twinx() +ax2.set_ylabel("Average latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor+BAR_WIDTH*i], [datas[i][labels[0]], datas[i][labels[1]]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor*2+BAR_WIDTH*i], datas[i][labels[2]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/param.bash b/benchmark/param.bash new file mode 100644 index 0000000..9f66510 --- /dev/null +++ b/benchmark/param.bash @@ -0,0 +1,2 @@ +HOST_IP_PREFIX="192.168.6." +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q $HOST_IP_PREFIX; if [ $? -eq 0 ]; then echo $i; fi; done) diff --git a/benchmark/postgres/postgres.sh b/benchmark/postgres/postgres.sh new file mode 100755 index 0000000..6e87e81 --- /dev/null +++ b/benchmark/postgres/postgres.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +set -eu -o pipefail + +POSTGRES_VERSION=16.1 +POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" + +source ~/.profile +cd $(dirname $0) +. ../../test/util.sh +. ../param.bash + +sudo nerdctl pull --quiet $POSTGRES_IMAGE +nerdctl pull --quiet $POSTGRES_IMAGE + +echo "===== Benchmark: postgresql rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client + set -ex + + sudo nerdctl run -d --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + sudo nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(sudo nerdctl exec psql-server hostname -i) + sleep 5 + sudo nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + sudo nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-rootful-direct.log + + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql rootful via host =====" +( + set +e + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client + set -ex + + sudo nerdctl run -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + sudo nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + sleep 5 + sudo nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + sudo nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-rootful-host.log + + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f psql-server + nerdctl rm -f psql-client + set -ex + + nerdctl run -d --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(nerdctl exec psql-server hostname -i) + PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') + NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 5432; do sleep 1; done" + nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-direct.log + + nerdctl rm -f psql-server + nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f psql-server + nerdctl rm -f psql-client + set -ex + + nerdctl run -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + sleep 5 + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-host.log + + nerdctl rm -f psql-server + nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') + NAME="psql-client" exec_netns /bin/bash -c "until nc -z $HOST_IP 15432; do sleep 1; done" + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-w-b4ns.log + + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: postgres client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(nerdctl exec psql-server hostname -i) + sleep 5 + nerdctl exec psql-client pgbench -h $SERVER_IP -p 5432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 5432 -U postgres -s 10 -t 1000 postgres + + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/postgres/postgres_multinode.sh b/benchmark/postgres/postgres_multinode.sh new file mode 100755 index 0000000..d821d0f --- /dev/null +++ b/benchmark/postgres/postgres_multinode.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f psql-server +NAME="test" exec_lxc nerdctl rm -f psql-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +POSTGRES_VERSION=16.1 +POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $POSTGRES_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $POSTGRES_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: postgresql rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name psql-server -e POSTGRES_PASSWORD=pass -d $POSTGRES_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name psql-client -e PGPASSWORD=pass -d $POSTGRES_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + NAME="test2" exec_lxc sudo nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -i postgres + NAME="test2" exec_lxc sudo nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres > postgres-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f psql-server + NAME="test2" exec_lxc sudo nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name psql-server -e POSTGRES_PASSWORD=pass -d $POSTGRES_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name psql-client -e PGPASSWORD=pass -d $POSTGRES_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -i postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres > postgres-multinode-wo-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f psql-server + NAME="test2" exec_lxc nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec psql-server hostname -i) + sleep 5 + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-multinode-w-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f psql-server + NAME="test2" exec_lxc nerdctl rm -f psql-client +) \ No newline at end of file diff --git a/benchmark/postgres/postgres_plot.py b/benchmark/postgres/postgres_plot.py new file mode 100644 index 0000000..3e43a7e --- /dev/null +++ b/benchmark/postgres/postgres_plot.py @@ -0,0 +1,56 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip() + if "latency average" in line: + data["latency (ms)"] = float(line.split(" ")[3]) + if "tps" in line: + data["tps"] = float(line.split(" ")[2]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['tps', 'latency (ms)'] + +plt.ylabel("Request / seconds") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +print(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("transaction / second") +ax2 = ax1.twinx() +ax2.set_ylabel("latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i], datas[i][labels[0]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor+BAR_WIDTH*i], datas[i][labels[1]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/rabbitmq/rabbitmq.sh b/benchmark/rabbitmq/rabbitmq.sh new file mode 100755 index 0000000..7b3a243 --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +RABBITMQ_VERSION=3.12.10 +RABBITMQ_IMAGE="rabbitmq:$RABBITMQ_VERSION" + +PERF_VERSION="2.20.0" +PERF_IMAGE="pivotalrabbitmq/perf-test:$PERF_VERSION" + +source ~/.profile +cd $(dirname $0) +. ../param.bash + +sudo nerdctl pull --quiet $RABBITMQ_IMAGE +sudo nerdctl pull --quiet $PERF_IMAGE +nerdctl pull --quiet $RABBITMQ_IMAGE +nerdctl pull --quiet $PERF_IMAGE + +echo "===== Benchmark: rabbitmq rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f rabbitmq-server + set -ex + + sudo nerdctl run -d --name rabbitmq-server $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(sudo nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-rootful-direct.log" + sudo nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + sudo nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq rootful via host =====" +( + set +e + sudo nerdctl rm -f rabbitmq-server + set -ex + + sudo nerdctl run -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(sudo nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-rootful-host.log" + sudo nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + sudo nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f rabbitmq-server + set -ex + + nerdctl run -d --name rabbitmq-server $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-wo-b4ns-direct.log" + nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f rabbitmq-server + set -ex + + nerdctl run -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-wo-b4ns-host.log" + nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + LOG_NAME="rabbitmq-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 + + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) \ No newline at end of file diff --git a/benchmark/rabbitmq/rabbitmq_multinode.sh b/benchmark/rabbitmq/rabbitmq_multinode.sh new file mode 100755 index 0000000..a1463a0 --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq_multinode.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f rabbitmq-server +NAME="test" exec_lxc nerdctl rm -f rabbitmq-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" + +RABBITMQ_VERSION=3.12.10 +RABBITMQ_IMAGE="rabbitmq:$RABBITMQ_VERSION" + +PERF_VERSION="2.20.0" +PERF_IMAGE="pivotalrabbitmq/perf-test:$PERF_VERSION" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $RABBITMQ_IMAGE +NAME="test" exec_lxc sudo nerdctl pull --quiet $PERF_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $RABBITMQ_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $PERF_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: rabbitmq rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="rabbitmq-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$TEST1_VXLAN_ADDR --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc sudo nerdctl rm -f rabbitmq-client +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="rabbitmq-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$TEST1_VXLAN_ADDR --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc nerdctl rm -f rabbitmq-client +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 5673:5672 --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + sleep 5 + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc nerdctl rm -f rabbitmq-client +) diff --git a/benchmark/rabbitmq/rabbitmq_plot.py b/benchmark/rabbitmq/rabbitmq_plot.py new file mode 100644 index 0000000..213a1e5 --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq_plot.py @@ -0,0 +1,46 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip() + if "sending rate avg" in line: + data["Sending"] = int(line.split(" ")[5]) + if "receiving rate avg" in line: + data["Receiving"] = int(line.split(" ")[5]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['Sending', 'Receiving'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("messages / second") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor + BAR_WIDTH*i], [datas[i][labels[0]], datas[i][labels[0]]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/redis/redis.sh b/benchmark/redis/redis.sh new file mode 100755 index 0000000..36f73e7 --- /dev/null +++ b/benchmark/redis/redis.sh @@ -0,0 +1,126 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + +REDIS_VERSION=7.2.3 +REDIS_IMAGE="redis:${REDIS_VERSION}" + +source ~/.profile +. ../param.bash + +sudo nerdctl pull --quiet $REDIS_IMAGE +nerdctl pull --quiet $REDIS_IMAGE + +echo "===== Benchmark: redis rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client + set -ex + + sudo nerdctl run -d --name redis-server "${REDIS_IMAGE}" + sudo nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(sudo nerdctl exec redis-server hostname -i) + sudo nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-rootful-direct.log + cat redis-rootful-direct.log + + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis rootful via host =====" +( + set +e + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client + set -ex + + sudo nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" + sudo nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + sudo nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-rootful-host.log + cat redis-rootful-host.log + + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f redis-server + nerdctl rm -f redis-client + set -ex + + nerdctl run -d --name redis-server "${REDIS_IMAGE}" + nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(nerdctl exec redis-server hostname -i) + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-wo-b4ns-direct.log + cat redis-wo-b4ns-direct.log + + nerdctl rm -f redis-server + nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f redis-server + nerdctl rm -f redis-client + set -ex + + nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" + nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-wo-b4ns-host.log + cat redis-wo-b4ns-host.log + + nerdctl rm -f redis-server + nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity + nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-w-b4ns.log + cat redis-w-b4ns.log + + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity + SERVER_IP=$(nerdctl exec redis-server hostname -i) + # without 'sleep 1', benchmark is not performed.(race condition?) + nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP -p 6379 --csv" + + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/redis/redis_multinode.sh b/benchmark/redis/redis_multinode.sh new file mode 100755 index 0000000..21dd291 --- /dev/null +++ b/benchmark/redis/redis_multinode.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../test/util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f redis-server +NAME="test" exec_lxc nerdctl rm -f redis-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +REDIS_VERSION=7.2.3 +REDIS_IMAGE="redis:${REDIS_VERSION}" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $REDIS_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $REDIS_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: redis rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name redis-server -d $REDIS_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc sudo nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f redis-server + NAME="test2" exec_lxc sudo nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-server -d $REDIS_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-wo-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f redis-server + NAME="test2" exec_lxc nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" > redis-multinode-w-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f redis-server + NAME="test2" exec_lxc nerdctl rm -f redis-client +) diff --git a/benchmark/redis/redis_plot.py b/benchmark/redis/redis_plot.py new file mode 100644 index 0000000..5b3b777 --- /dev/null +++ b/benchmark/redis/redis_plot.py @@ -0,0 +1,38 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + reader = csv.reader(f) + next(reader, None) + for row in reader: + data[row[0]] = float(row[1]) + return data + +BAR_WIDTH=0.25 + +labels_for_data=['PING_INLINE', 'PING_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH (needed to benchmark LRANGE)', 'LRANGE_100 (first 100 elements)', 'LRANGE_300 (first 300 elements)', 'LRANGE_500 (first 500 elements)', 'LRANGE_600 (first 600 elements)', 'MSET (10 keys)', 'XADD'] +labels=['PING\n_INLINE', 'PING\n_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH', 'LRANGE\n_100', 'LRANGE\n_300', 'LRANGE\n_500', 'LRANGE\n_600', 'MSET\n(10 keys)', 'XADD'] + +plt.rcParams["figure.figsize"] = (20,4) +plt.ylabel("Request / seconds") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data_csv = load_data(filename) + value = [] + for l in labels_for_data: + value.append(data_csv[l]) + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +plt.legend() +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) diff --git a/benchmark/run_bench.sh b/benchmark/run_bench.sh new file mode 100755 index 0000000..9ed00cd --- /dev/null +++ b/benchmark/run_bench.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -e + +cd $(dirname $0) + +BENCHMARKS=(iperf3 block redis memcached etcd rabbitmq mysql postgres) + +for BENCH in ${BENCHMARKS[@]}; do + pushd $BENCH + ./${BENCH}.sh + python3 ${BENCH}_plot.py $BENCH-rootful-direct.log $BENCH-rootful-host.log $BENCH-wo-b4ns-direct.log $BENCH-wo-b4ns-host.log $BENCH-w-b4ns.log ../$BENCH.png + popd +done \ No newline at end of file diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index b26b3d1..32c2a6e 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -13,6 +13,7 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/bypass4netns" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" "github.com/rootless-containers/bypass4netns/pkg/oci" pkgversion "github.com/rootless-containers/bypass4netns/pkg/version" seccomp "github.com/seccomp/libseccomp-golang" @@ -22,11 +23,14 @@ import ( ) var ( - socketFile string - pidFile string - logFilePath string - readyFd int - exitFd int + socketFile string + comSocketFile string + pidFile string + logFilePath string + multinodeEtcdAddress string + multinodeHostAddress string + readyFd int + exitFd int ) func main() { @@ -37,8 +41,11 @@ func main() { } flag.StringVar(&socketFile, "socket", filepath.Join(xdgRuntimeDir, oci.SocketName), "Socket file") + flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") + flag.StringVar(&multinodeEtcdAddress, "multinode-etcd-address", "", "Etcd address for multinode communication") + flag.StringVar(&multinodeHostAddress, "multinode-host-address", "", "Host address for multinode communication") flag.IntVar(&readyFd, "ready-fd", -1, "File descriptor to notify when ready") flag.IntVar(&exitFd, "exit-fd", -1, "File descriptor for terminating bypass4netns") ignoredSubnets := flag.StringSlice("ignore", []string{"127.0.0.0/8"}, "Subnets to ignore in bypass4netns. Can be also set to \"auto\".") @@ -46,7 +53,12 @@ func main() { debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") - nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide + nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide + tracerAgentFlag := flag.Bool("tracer-agent", false, "(An internal flag. Do not use manually.)") // TODO: hide + memNSEnterPid := flag.Int("mem-nsenter-pid", -1, "(An internal flag. Do not use manually.)") // TODO: hide + handleC2cEnable := flag.Bool("handle-c2c-connections", false, "Handle connections between containers") + tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") + multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") // Parse arguments flag.Parse() @@ -74,6 +86,24 @@ func main() { os.Exit(0) } + if *memNSEnterPid > 0 { + logrus.SetOutput(os.Stdout) + if err := bypass4netns.OpenMemWithNSEnterAgent(uint32(*memNSEnterPid)); err != nil { + logrus.Fatal(err) + } + os.Exit(0) + } + + if logFilePath != "" { + logFile, err := os.Create(logFilePath) + if err != nil { + logrus.Fatalf("Cannnot write log file %s : %v", logFilePath, err) + } + defer logFile.Close() + logrus.SetOutput(io.MultiWriter(os.Stderr, logFile)) + logrus.Infof("LogFilePath: %s", logFilePath) + } + if *nsagentFlag { if err := nsagent.Main(); err != nil { logrus.Fatal(err) @@ -81,6 +111,35 @@ func main() { os.Exit(0) } + if *tracerAgentFlag { + if err := tracer.Main(); err != nil { + logrus.Fatal(err) + } + os.Exit(0) + } + + if *handleC2cEnable { + if comSocketFile == "" { + logrus.Fatal("--com-socket is not specified") + } + } + + if *tracerEnable { + if !*handleC2cEnable { + logrus.Fatal("--handle-c2c-connections is not enabled") + } + } + + if *multinodeEnable { + if multinodeEtcdAddress == "" { + logrus.Fatal("--multinode-etcd-address is not specified") + } + if multinodeHostAddress == "" { + logrus.Fatal("--multinode-host-address is not specified") + } + logrus.WithFields(logrus.Fields{"etcdAddress": multinodeEtcdAddress, "hostAddress": multinodeHostAddress}).Infof("Multinode communication is enabled.") + } + if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) { logrus.Fatalf("Cannot cleanup socket file: %v", err) } @@ -93,19 +152,9 @@ func main() { logrus.Infof("PidFilePath: %s", pidFile) } - if logFilePath != "" { - logFile, err := os.Create(logFilePath) - if err != nil { - logrus.Fatalf("Cannnot write log file %s : %v", logFilePath, err) - } - defer logFile.Close() - logrus.SetOutput(io.MultiWriter(os.Stderr, logFile)) - logrus.Infof("LogFilePath: %s", logFilePath) - } - logrus.Infof("SocketPath: %s", socketFile) - handler := bypass4netns.NewHandler(socketFile) + handler := bypass4netns.NewHandler(socketFile, comSocketFile, strings.Replace(logFilePath, ".log", "-tracer.log", -1)) subnets := []net.IPNet{} var subnetsAuto bool @@ -196,5 +245,14 @@ func main() { os.Exit(0) }() - handler.StartHandle() + c2cConfig := &bypass4netns.C2CConnectionHandleConfig{ + Enable: *handleC2cEnable, + TracerEnable: *tracerEnable, + } + multinode := &bypass4netns.MultinodeConfig{ + Enable: *multinodeEnable, + EtcdAddress: multinodeEtcdAddress, + HostAddress: multinodeHostAddress, + } + handler.StartHandle(c2cConfig, multinode) } diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index 3b1aa7c..9e2aed2 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/gorilla/mux" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/api/daemon/router" "github.com/rootless-containers/bypass4netns/pkg/bypass4netnsd" pkgversion "github.com/rootless-containers/bypass4netns/pkg/version" @@ -20,10 +21,13 @@ import ( ) var ( - socketFile string - pidFile string - logFilePath string - b4nnPath string + socketFile string + comSocketFile string // socket for channel with bypass4netns + pidFile string + logFilePath string + b4nnPath string + multinodeEtcdAddress string + multinodeHostAddress string ) func main() { @@ -39,9 +43,15 @@ func main() { defaultB4nnPath := filepath.Join(filepath.Dir(exePath), "bypass4netns") flag.StringVar(&socketFile, "socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd.sock"), "Socket file") + flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.StringVar(&b4nnPath, "b4nn-executable", defaultB4nnPath, "Path to bypass4netns executable") + flag.StringVar(&multinodeEtcdAddress, "multinode-etcd-address", "", "Etcd address for multinode communication") + flag.StringVar(&multinodeHostAddress, "multinode-host-address", "", "Host address for multinode communication") + tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") + handleC2cEnable := flag.Bool("handle-c2c-connections", false, "Handle connections between containers") + multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") @@ -75,6 +85,11 @@ func main() { } logrus.Infof("SocketPath: %s", socketFile) + if err := os.Remove(comSocketFile); err != nil && !errors.Is(err, os.ErrNotExist) { + logrus.Fatalf("Cannot cleanup communication socket file: %v", err) + } + logrus.Infof("CommunicationSocketPath: %s", comSocketFile) + if pidFile != "" { pid := fmt.Sprintf("%d", os.Getpid()) if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil { @@ -98,15 +113,64 @@ func main() { } logrus.Infof("bypass4netns executable path: %s", b4nnPath) - err = listenServeAPI(socketFile, &router.Backend{ - BypassDriver: bypass4netnsd.NewDriver(b4nnPath), - }) - if err != nil { - logrus.Fatalf("failed to serve API: %s", err) + b4nsdDriver := bypass4netnsd.NewDriver(b4nnPath, comSocketFile) + + if *handleC2cEnable && *multinodeEnable { + logrus.Fatal("--handle-c2c-connections and multinode cannot be enabled at the sametime") + } + + if *handleC2cEnable { + logrus.Info("Handling connections between containers") + b4nsdDriver.HandleC2CEnable = *handleC2cEnable + } + + if *tracerEnable { + if !*handleC2cEnable { + logrus.Fatal("--handle-c2c-connections is not enabled") + } + logrus.Info("Connection tracer is enabled") + b4nsdDriver.TracerEnable = *tracerEnable + } + + if *multinodeEnable { + if multinodeEtcdAddress == "" { + logrus.Fatal("--multinode-etcd-address is not specified") + } + if multinodeHostAddress == "" { + logrus.Fatal("--multinode-host-address is not specified") + } + b4nsdDriver.MultinodeEnable = *multinodeEnable + b4nsdDriver.MultinodeEtcdAddress = multinodeEtcdAddress + b4nsdDriver.MultinodeHostAddress = multinodeHostAddress + logrus.WithFields(logrus.Fields{"etcdAddress": multinodeEtcdAddress, "hostAddress": multinodeHostAddress}).Info("Multinode communication is enabled.") } + + waitChan := make(chan bool) + go func() { + err = listenServeNerdctlAPI(socketFile, &router.Backend{ + BypassDriver: b4nsdDriver, + }) + if err != nil { + logrus.Fatalf("failed to serve nerdctl API: %q", err) + } + waitChan <- true + }() + + go func() { + err = listenServeBypass4netnsAPI(comSocketFile, &com.Backend{ + BypassDriver: b4nsdDriver, + }) + if err != nil { + logrus.Fatalf("failed to serve bypass4netns: %q", err) + } + waitChan <- true + }() + + <-waitChan + logrus.Fatalf("process exited") } -func listenServeAPI(socketPath string, backend *router.Backend) error { +func listenServeNerdctlAPI(socketPath string, backend *router.Backend) error { r := mux.NewRouter() router.AddRoutes(r, backend) srv := &http.Server{Handler: r} @@ -118,6 +182,22 @@ func listenServeAPI(socketPath string, backend *router.Backend) error { if err != nil { return err } - logrus.Infof("Starting to serve on %s", socketPath) + logrus.Infof("Starting nerdctl API to serve on %s", socketPath) + return srv.Serve(l) +} + +func listenServeBypass4netnsAPI(sockPath string, backend *com.Backend) error { + r := mux.NewRouter() + com.AddRoutes(r, backend) + srv := &http.Server{Handler: r} + err := os.RemoveAll(sockPath) + if err != nil { + return err + } + l, err := net.Listen("unix", sockPath) + if err != nil { + return err + } + logrus.Infof("Starting bypass4netns API to serve on %s", sockPath) return srv.Serve(l) } diff --git a/cmd/bypass4netnsd/main_test.go b/cmd/bypass4netnsd/main_test.go index a6fa53f..bf28716 100644 --- a/cmd/bypass4netnsd/main_test.go +++ b/cmd/bypass4netnsd/main_test.go @@ -2,18 +2,20 @@ package main import ( "context" + "net" "os" "path/filepath" "syscall" "testing" "github.com/rootless-containers/bypass4netns/pkg/api" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/api/daemon/client" "github.com/stretchr/testify/assert" ) // Start bypass4netnsd before testing -func TestBypass4netnsd(t *testing.T) { +func TestNerdctlAPI(t *testing.T) { xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR") if xdgRuntimeDir == "" { panic("$XDG_RUNTIME_DIR needs to be set") @@ -57,3 +59,75 @@ func isProcessRunning(pid int) bool { return err == nil } + +func TestBypass4netnsAPI(t *testing.T) { + xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR") + if xdgRuntimeDir == "" { + panic("$XDG_RUNTIME_DIR needs to be set") + } + client, err := com.NewComClient(filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock")) + if err != nil { + t.Fatalf("failed to create ComClient %q", err) + } + + mac, err := net.ParseMAC("ea:1e:d5:cd:e2:ea") + assert.Equal(t, nil, err) + ip, ipNet, err := net.ParseCIDR("10.4.0.53/24") + assert.Equal(t, nil, err) + ipNet.IP = ip + cid := "c70ae35d2aeb4c98c5ef9eb4" + containerIf := com.ContainerInterfaces{ + ContainerID: cid, + Interfaces: []com.Interface{ + { + Name: "eth0", + HWAddr: mac, + Addresses: []net.IPNet{*ipNet}, + IsLoopback: false, + }, + }, + ForwardingPorts: map[int]int{ + 5201: 5202, + }, + } + err = client.Ping(context.TODO()) + assert.Equal(t, nil, err) + + ifs, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 0, len(ifs)) + + // this should be error + _, err = client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.NotEqual(t, nil, err) + + // Registering interface + postedIfs, err := client.PostInterface(context.TODO(), &containerIf) + assert.Equal(t, nil, err) + assert.Equal(t, postedIfs.ContainerID, containerIf.ContainerID) + assert.Equal(t, postedIfs.Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + + ifs2, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 1, len(ifs2)) + assert.Equal(t, ifs2[cid].ContainerID, containerIf.ContainerID) + assert.Equal(t, ifs2[cid].Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + assert.Equal(t, ifs2[cid].ForwardingPorts[5201], 5202) + + ifs3, err := client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.Equal(t, nil, err) + assert.Equal(t, ifs3.ContainerID, containerIf.ContainerID) + assert.Equal(t, ifs3.Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + assert.Equal(t, ifs3.ForwardingPorts[5201], 5202) + + // Removing interface + err = client.DeleteInterface(context.TODO(), containerIf.ContainerID) + assert.Equal(t, nil, err) + + ifs4, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 0, len(ifs4)) + + _, err = client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.NotEqual(t, nil, err) +} diff --git a/go.mod b/go.mod index dd4ccd3..8df6a9d 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,12 @@ require ( ) require ( + github.com/coreos/etcd v3.3.27+incompatible // indirect + github.com/coreos/go-semver v0.3.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c12d135..bfa3afd 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,19 @@ +github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA= +github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/opencontainers/runtime-spec v1.0.3-0.20220809190508-9ee22abf867e h1:UcO9GCY5ehlR0PK20BXQ+nlb8J2LNXlwm97PryxbIek= github.com/opencontainers/runtime-spec v1.0.3-0.20220809190508-9ee22abf867e/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/oraoto/go-pidfd v0.1.2-0.20210402155345-46bf1ba22e22 h1:TBw1Dwr/0eRvVIhdgQ+qGQuJ2STNL1+bjaI7nKLCoiQ= @@ -17,12 +28,15 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/vtolstov/go-ioctl v0.0.0-20151206205506-6be9cced4810 h1:X6ps8XHfpQjw8dUStzlMi2ybiKQ2Fmdw7UM+TinwvyM= github.com/vtolstov/go-ioctl v0.0.0-20151206205506-6be9cced4810/go.mod h1:dF0BBJ2YrV1+2eAIyEI+KeSidgA6HqoIP1u5XTlMq/o= +go.etcd.io/etcd v3.3.27+incompatible h1:5hMrpf6REqTHV2LW2OclNpRtxI0k9ZplMemJsMSWju0= +go.etcd.io/etcd v3.3.27+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220818161305-2296e01440c6 h1:Sx/u41w+OwrInGdEckYmEuU5gHoGSL4QbDz3S9s6j4U= golang.org/x/sys v0.0.0-20220818161305-2296e01440c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/api/com/api.go b/pkg/api/com/api.go new file mode 100644 index 0000000..5a3a916 --- /dev/null +++ b/pkg/api/com/api.go @@ -0,0 +1,18 @@ +package com + +import ( + "net" +) + +type ContainerInterfaces struct { + ContainerID string `json:"containerID"` + Interfaces []Interface `json:"interfaces"` + // key is "container-side" port, value is host-side port + ForwardingPorts map[int]int `json:"forwardingPorts"` +} +type Interface struct { + Name string `json:"name"` + HWAddr net.HardwareAddr `json:"hwAddr"` + Addresses []net.IPNet `json:"addresses"` + IsLoopback bool `json:"isLoopback"` +} diff --git a/pkg/api/com/client.go b/pkg/api/com/client.go new file mode 100644 index 0000000..6eae247 --- /dev/null +++ b/pkg/api/com/client.go @@ -0,0 +1,227 @@ +// This code is copied from https://github.com/rootless-containers/rootlesskit/blob/master/pkg/api/client/client.go v0.14.6 +// The code is licensed under Apache-2.0 + +package com + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + + "github.com/rootless-containers/bypass4netns/pkg/api" +) + +type ComClient struct { + client *http.Client + version string + dummyHost string +} + +func NewComClient(socketPath string) (*ComClient, error) { + if _, err := os.Stat(socketPath); err != nil { + return nil, err + } + hc := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + var d net.Dialer + return d.DialContext(ctx, "unix", socketPath) + }, + }, + } + + return &ComClient{ + client: hc, + version: "v1", + dummyHost: "bypass4netnsd-com", + }, nil +} + +func readAtMost(r io.Reader, maxBytes int) ([]byte, error) { + lr := &io.LimitedReader{ + R: r, + N: int64(maxBytes), + } + b, err := io.ReadAll(lr) + if err != nil { + return b, err + } + if lr.N == 0 { + return b, fmt.Errorf("expected at most %d bytes, got more", maxBytes) + } + return b, nil +} + +// HTTPStatusErrorBodyMaxLength specifies the maximum length of HTTPStatusError.Body +const HTTPStatusErrorBodyMaxLength = 64 * 1024 + +// HTTPStatusError is created from non-2XX HTTP response +type HTTPStatusError struct { + // StatusCode is non-2XX status code + StatusCode int + // Body is at most HTTPStatusErrorBodyMaxLength + Body string +} + +// Error implements error. +// If e.Body is a marshalled string of api.ErrorJSON, Error returns ErrorJSON.Message . +// Otherwise Error returns a human-readable string that contains e.StatusCode and e.Body. +func (e *HTTPStatusError) Error() string { + if e.Body != "" && len(e.Body) < HTTPStatusErrorBodyMaxLength { + var ej api.ErrorJSON + if json.Unmarshal([]byte(e.Body), &ej) == nil { + return ej.Message + } + } + return fmt.Sprintf("unexpected HTTP status %s, body=%q", http.StatusText(e.StatusCode), e.Body) +} + +func successful(resp *http.Response) error { + if resp == nil { + return errors.New("nil response") + } + if resp.StatusCode/100 != 2 { + b, _ := readAtMost(resp.Body, HTTPStatusErrorBodyMaxLength) + return &HTTPStatusError{ + StatusCode: resp.StatusCode, + Body: string(b), + } + } + return nil +} + +func (c *ComClient) Ping(ctx context.Context) error { + m, err := json.Marshal("ping") + if err != nil { + return err + } + u := fmt.Sprintf("http://%s/%s/ping", c.dummyHost, c.version) + req, err := http.NewRequest("GET", u, bytes.NewReader(m)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return err + } + dec := json.NewDecoder(resp.Body) + var pong string + if err := dec.Decode(&pong); err != nil { + return err + } + + if pong != "pong" { + return fmt.Errorf("unexpected response expected=%q actual=%q", "pong", pong) + } + return nil +} + +func (c *ComClient) ListInterfaces(ctx context.Context) (map[string]ContainerInterfaces, error) { + u := fmt.Sprintf("http://%s/%s/interfaces", c.dummyHost, c.version) + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs map[string]ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return containerIfs, nil +} + +func (c *ComClient) GetInterface(ctx context.Context, id string) (*ContainerInterfaces, error) { + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, id) + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return &containerIfs, nil +} + +func (c *ComClient) PostInterface(ctx context.Context, ifs *ContainerInterfaces) (*ContainerInterfaces, error) { + m, err := json.Marshal(ifs) + if err != nil { + return nil, err + } + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, ifs.ContainerID) + req, err := http.NewRequest("POST", u, bytes.NewReader(m)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return &containerIfs, nil +} + +func (c *ComClient) DeleteInterface(ctx context.Context, id string) error { + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, id) + req, err := http.NewRequest("DELETE", u, nil) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return err + } + return nil +} diff --git a/pkg/api/com/router.go b/pkg/api/com/router.go new file mode 100644 index 0000000..0cbe188 --- /dev/null +++ b/pkg/api/com/router.go @@ -0,0 +1,123 @@ +package com + +import ( + "encoding/json" + "errors" + "net/http" + + "github.com/gorilla/mux" + "github.com/rootless-containers/bypass4netns/pkg/api" +) + +type Backend struct { + BypassDriver BypassDriver +} + +type BypassDriver interface { + ListInterfaces() map[string]ContainerInterfaces + GetInterface(id string) *ContainerInterfaces + PostInterface(id string, containerIfs *ContainerInterfaces) + DeleteInterface(id string) +} + +func AddRoutes(r *mux.Router, b *Backend) { + v1 := r.PathPrefix("/v1").Subrouter() + _ = v1 + v1.Path("/ping").Methods("GET").HandlerFunc(b.ping) + v1.Path("/interfaces").Methods("GET").HandlerFunc(b.listInterfaces) + v1.Path("/interface/{id}").Methods("GET").HandlerFunc(b.getInterface) + v1.Path("/interface/{id}").Methods("POST").HandlerFunc(b.postInterface) + v1.Path("/interface/{id}").Methods("DELETE").HandlerFunc(b.deleteInterface) +} + +func (b *Backend) onError(w http.ResponseWriter, r *http.Request, err error, ec int) { + w.WriteHeader(ec) + w.Header().Set("Content-Type", "application/json") + // it is safe to return the err to the client, because the client is reliable + e := api.ErrorJSON{ + Message: err.Error(), + } + _ = json.NewEncoder(w).Encode(e) +} + +func (b *Backend) ping(w http.ResponseWriter, r *http.Request) { + m, err := json.Marshal("pong") + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) listInterfaces(w http.ResponseWriter, r *http.Request) { + ifs := b.BypassDriver.ListInterfaces() + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) getInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + ifs := b.BypassDriver.GetInterface(id) + if ifs == nil { + b.onError(w, r, errors.New("not found"), http.StatusNotFound) + return + } + + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) postInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + decoder := json.NewDecoder(r.Body) + var containerIfs ContainerInterfaces + if err := decoder.Decode(&containerIfs); err != nil { + b.onError(w, r, err, http.StatusBadRequest) + return + } + b.BypassDriver.PostInterface(id, &containerIfs) + + ifs := b.BypassDriver.GetInterface(id) + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) deleteInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + b.BypassDriver.DeleteInterface(id) +} diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index f6de96e..9ea628e 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -6,21 +6,32 @@ package bypass4netns import ( "bytes" gocontext "context" - "encoding/binary" "encoding/json" "errors" "fmt" "net" + "os" + "os/exec" + "strconv" + "strings" "syscall" + "time" + "github.com/coreos/etcd/client" "github.com/opencontainers/runtime-spec/specs-go" "github.com/oraoto/go-pidfd" + "github.com/rootless-containers/bypass4netns/pkg/api/com" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/iproute2" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nonbypassable" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" + "github.com/rootless-containers/bypass4netns/pkg/util" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) +const ETCD_MULTINODE_PREFIX = "bypass4netns/multinode/" + func closeStateFds(recvFds []int) { for i := range recvFds { unix.Close(i) @@ -70,14 +81,13 @@ func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) { } // readProcMem read data from memory of specified pid process at the spcified offset. -func readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { +func (h *notifHandler) readProcMem(pid int, offset uint64, len uint64) ([]byte, error) { buffer := make([]byte, len) // PATH_MAX - memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777) + memfd, err := h.openMem(pid) if err != nil { return nil, err } - defer unix.Close(memfd) size, err := unix.Pread(memfd, buffer, int64(offset)) if err != nil { @@ -87,6 +97,134 @@ func readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { return buffer[:size], nil } +// writeProcMem writes data to memory of specified pid process at the specified offset. +func (h *notifHandler) writeProcMem(pid int, offset uint64, buf []byte) error { + memfd, err := h.openMem(pid) + if err != nil { + return err + } + + size, err := unix.Pwrite(memfd, buf, int64(offset)) + if err != nil { + return err + } + if len(buf) != size { + return fmt.Errorf("data is not written successfully. expected size=%d actual size=%d", len(buf), size) + } + + return nil +} + +func (h *notifHandler) openMem(pid int) (int, error) { + if memfd, ok := h.memfds[pid]; ok { + return memfd, nil + } + memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDWR, 0o777) + if err != nil { + logrus.WithField("pid", pid).Warn("failed to open mem due to permission error. retrying with agent.") + newMemfd, err := openMemWithNSEnter(pid) + if err != nil { + return 0, fmt.Errorf("failed to open mem with agent (pid=%d)", pid) + } + logrus.WithField("pid", pid).Info("succeeded to open mem with agent. continue to process") + memfd = newMemfd + } + h.memfds[pid] = memfd + + return memfd, nil +} + +func openMemWithNSEnter(pid int) (int, error) { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + return 0, err + } + + // configure timeout + timeout := &syscall.Timeval{ + Sec: 0, + Usec: 500 * 1000, + } + err = syscall.SetsockoptTimeval(fds[0], syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, timeout) + if err != nil { + return 0, fmt.Errorf("failed to set receive timeout") + } + err = syscall.SetsockoptTimeval(fds[1], syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, timeout) + if err != nil { + return 0, fmt.Errorf("failed to set send timeout") + } + + fd1File := os.NewFile(uintptr(fds[0]), "") + defer fd1File.Close() + fd1Conn, err := net.FileConn(fd1File) + if err != nil { + return 0, err + } + _ = fd1Conn + + selfExe, err := os.Executable() + if err != nil { + return 0, err + } + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return 0, err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(int(pid)), + "-F", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(int(pid), selfPid) + if err != nil { + return 0, fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", selfExe, fmt.Sprintf("--mem-nsenter-pid=%d", pid)) + cmd := exec.CommandContext(gocontext.TODO(), nsenter, nsenterFlags...) + cmd.ExtraFiles = []*os.File{os.NewFile(uintptr(fds[1]), "")} + stdout := bytes.Buffer{} + cmd.Stdout = &stdout + err = cmd.Start() + if err != nil { + return 0, fmt.Errorf("failed to exec mem open agent %q", err) + } + memfd, recvMsgs, err := util.RecvMsg(fd1Conn) + if err != nil { + logrus.Infof("stdout=%q", stdout.String()) + return 0, fmt.Errorf("failed to receive message") + } + logrus.Debugf("recvMsgs=%s", string(recvMsgs)) + err = cmd.Wait() + if err != nil { + return 0, err + } + + return memfd, nil +} + +func OpenMemWithNSEnterAgent(pid uint32) error { + // fd 3 should be passed socket pair + fdFile := os.NewFile(uintptr(3), "") + defer fdFile.Close() + fdConn, err := net.FileConn(fdFile) + if err != nil { + logrus.WithError(err).Fatal("failed to open conn") + } + memPath := fmt.Sprintf("/proc/%d/mem", pid) + memfd, err := unix.Open(memPath, unix.O_RDWR, 0o777) + if err != nil { + logrus.WithError(err).Fatalf("failed to open %s", memPath) + } + err = util.SendMsg(fdConn, memfd, []byte(fmt.Sprintf("opened %s", memPath))) + if err != nil { + logrus.WithError(err).Fatal("failed to send message") + } + return nil +} + func handleNewMessage(sockfd int) (uintptr, *specs.ContainerProcessState, error) { const maxNameLen = 4096 stateBuf := make([]byte, maxNameLen) @@ -141,15 +279,74 @@ type context struct { resp *libseccomp.ScmpNotifResp } -// getFdInProcess get the file descriptor in other process -func getFdInProcess(pid, targetFd int) (int, error) { +func (h *notifHandler) getPidFdInfo(pid int) (*pidInfo, error) { + // retrieve pidfd from cache + if pidfd, ok := h.pidInfos[pid]; ok { + return &pidfd, nil + } + targetPidfd, err := pidfd.Open(int(pid), 0) + if err == nil { + info := pidInfo{ + pidType: PROCESS, + pidfd: targetPidfd, + tgid: pid, // process's pid is equal to its tgid + } + h.pidInfos[pid] = info + return &info, nil + } + + // pid can be thread and pidfd_open fails with thread's pid. + // retrieve process's pid (tgid) from /proc//status and retry to get pidfd with the tgid. + logrus.Warnf("pidfd Open failed: pid=%d err=%q, this pid maybe thread and retrying with tgid", pid, err) + st, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid)) + if err != nil { + return nil, fmt.Errorf("failed to read %d's status err=%q", pid, err) + } + + nextTgid := -1 + for _, s := range strings.Split(string(st), "\n") { + if strings.Contains(s, "Tgid") { + tgids := strings.Split(s, "\t") + if len(tgids) < 2 { + return nil, fmt.Errorf("unexpected /proc/%d/status len=%q status=%q", pid, len(tgids), string(st)) + } + tgid, err := strconv.Atoi(tgids[1]) + if err != nil { + return nil, fmt.Errorf("unexpected /proc/%d/status err=%q status=%q", pid, err, string(st)) + } + nextTgid = tgid + } + if nextTgid > 0 { + break + } + } + if nextTgid < 0 { + logrus.Errorf("cannot get Tgid from /proc/%d/status status=%q", pid, string(st)) + } + targetPidfd, err = pidfd.Open(nextTgid, 0) + if err != nil { + return nil, fmt.Errorf("pidfd Open failed with Tgid: pid=%d %s", nextTgid, err) + } + + logrus.Infof("successfully got pidfd for pid=%d tgid=%d", pid, nextTgid) + info := pidInfo{ + pidType: THREAD, + pidfd: targetPidfd, + tgid: nextTgid, + } + h.pidInfos[pid] = info + return &info, nil +} + +// getFdInProcess get the file descriptor in other process +func (h *notifHandler) getFdInProcess(pid, targetFd int) (int, error) { + targetPidfd, err := h.getPidFdInfo(pid) if err != nil { return 0, fmt.Errorf("pidfd Open failed: %s", err) } - defer syscall.Close(int(targetPidfd)) - fd, err := targetPidfd.GetFd(targetFd, 0) + fd, err := targetPidfd.pidfd.GetFd(targetFd, 0) if err != nil { return 0, fmt.Errorf("pidfd GetFd failed: %s", err) } @@ -179,446 +376,193 @@ func getSocketArgs(sockfd int) (int, int, int, error) { return sock_domain, sock_type, sock_protocol, nil } -// duplicateSocketOnHost duplicate socket in other process to socket on host. -// retun values are (duplicated socket fd, target socket fd in current process, error) -func duplicateSocketOnHost(pid int, _sockfd int) (int, int, error) { - sockfd, err := getFdInProcess(pid, _sockfd) - if err != nil { - return 0, 0, fmt.Errorf("failed to get fd %s", err) - } - - sock_domain, sock_type, sock_protocol, err := getSocketArgs(sockfd) - if err != nil { - return 0, 0, fmt.Errorf("failed to get socket args %s", err) - } - - switch sock_domain { - case syscall.AF_INET, syscall.AF_INET6: - default: - return 0, 0, fmt.Errorf("expected AF_INET or AF_INET6, got %d", sock_domain) - } - - // only SOCK_STREAM and SOCK_DGRAM are acceptable. - if sock_type != syscall.SOCK_STREAM && sock_type != syscall.SOCK_DGRAM { - return 0, 0, fmt.Errorf("SOCK_STREAM and SOCK_DGRAM are supported") - } - - sockfd2, err := syscall.Socket(sock_domain, sock_type, sock_protocol) - if err != nil { - return 0, 0, fmt.Errorf("socket failed: %s", err) - } - - return sockfd2, sockfd, nil -} - -func readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockaddr, error) { - buf, err := readProcMem(pid, offset, addrlen) +func (h *notifHandler) readSockaddrFromProcess(pid int, offset uint64, addrlen uint64) (*sockaddr, error) { + buf, err := h.readProcMem(pid, offset, addrlen) if err != nil { return nil, fmt.Errorf("failed readProcMem pid %v offset 0x%x: %s", pid, offset, err) } return newSockaddr(buf) } -// manageSocket manages socketStatus and return next injecting file descriptor -// return values are (continue?, injecting fd) -func (h *notifHandler) manageSocket(destAddr net.IP, pid int, sockfd int, logger *logrus.Entry) (bool, int) { - destIsIgnored := h.nonBypassable.Contains(destAddr) - key := fmt.Sprintf("%d:%d", pid, sockfd) - sockStatus, ok := h.socketInfo.status[key] +func (h *notifHandler) registerSocket(pid int, sockfd int, syscallName string) (*socketStatus, error) { + logger := logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd, "syscall": syscallName}) + proc, ok := h.processes[pid] if !ok { - if destIsIgnored { - // the socket has never been bypassed and no need to bypass - logger.Debugf("%s is ignored, skipping.", destAddr.String()) - return false, 0 - } else { - // the socket has never been bypassed and need to bypass - sockfd2, sockfd, err := duplicateSocketOnHost(pid, sockfd) - if err != nil { - logger.Errorf("duplicating socket failed: %s", err) - return false, 0 - } - - sockStatus := socketStatus{ - state: Bypassed, - fdInNetns: sockfd, - fdInHost: sockfd2, - } - h.socketInfo.status[key] = sockStatus - logger.Debugf("start to bypass fdInHost=%d fdInNetns=%d", sockStatus.fdInHost, sockStatus.fdInNetns) - return true, sockfd2 - } - } else { - if sockStatus.state == Bypassed { - if !destIsIgnored { - // the socket has been bypassed and continue to be bypassed - logger.Debugf("continue to bypass") - return false, 0 - } else { - // the socket has been bypassed and need to switch back to socket in netns - logger.Debugf("switchback fdInHost(%d) -> fdInNetns(%d)", sockStatus.fdInHost, sockStatus.fdInNetns) - sockStatus.state = SwitchBacked - - h.socketInfo.status[key] = sockStatus - return true, sockStatus.fdInNetns - } - } else if sockStatus.state == SwitchBacked { - if destIsIgnored { - // the socket has been switchbacked(not bypassed) and no need to be bypassed - logger.Debugf("continue not bypassing") - return false, 0 - } else { - // the socket has been switchbacked(not bypassed) and need to bypass again - logger.Debugf("bypass again fdInNetns(%d) -> fdInHost(%d)", sockStatus.fdInNetns, sockStatus.fdInHost) - sockStatus.state = Bypassed - - h.socketInfo.status[key] = sockStatus - return true, sockStatus.fdInHost - } - } else { - panic(fmt.Errorf("unexpected state :%d", sockStatus.state)) - } + proc = newProcessStatus() + h.processes[pid] = proc + logger.Debug("process is registered") } -} -// handleSysBind handles syscall bind(2). -// If binding port is the target of port-forwarding, -// it creates and configures including bind(2) a socket on host. -// Then, handler replaces container's socket to created one. -func (h *notifHandler) handleSysBind(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "bind", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return + sock, ok := proc.sockets[sockfd] + if ok { + logger.Warn("socket is already registered") + return sock, nil } - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // TODO: get port-fowrad mapping from nerdctl - fwdPort, ok := h.forwardingPorts[int(sa.Port)] - if !ok { - logger.Infof("port=%d is not target of port forwarding.", sa.Port) - return + // If the pid is thread, its process can have corresponding socket + procInfo, ok := h.pidInfos[int(pid)] + if ok && procInfo.pidType == THREAD { + return nil, fmt.Errorf("unexpected procInfo") } - sockfd2, sockfd, err := duplicateSocketOnHost(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) + sockFdHost, err := h.getFdInProcess(int(pid), sockfd) if err != nil { - logger.Errorf("duplicating socket failed: %s", err) - return + return nil, err } - defer syscall.Close(sockfd) - defer syscall.Close(sockfd2) + defer syscall.Close(sockFdHost) - err = h.socketInfo.configureSocket(ctx, sockfd2) + sockDomain, sockType, sockProtocol, err := getSocketArgs(sockFdHost) + sock = newSocketStatus(pid, sockfd, sockDomain, sockType, sockProtocol) if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) - return - } - - var bind_addr syscall.Sockaddr - - switch sa.Family { - case syscall.AF_INET: - var addr [4]byte - for i := 0; i < 4; i++ { - addr[i] = sa.IP[i] - } - bind_addr = &syscall.SockaddrInet4{ - Port: fwdPort.HostPort, - Addr: addr, - } - case syscall.AF_INET6: - var addr [16]byte - for i := 0; i < 16; i++ { - addr[i] = sa.IP[i] - } - bind_addr = &syscall.SockaddrInet6{ - Port: fwdPort.HostPort, - ZoneId: sa.ScopeID, - Addr: addr, + // non-socket fd is not bypassable + sock.state = NotBypassable + logger.Debugf("failed to get socket args err=%q", err) + } else { + if sockDomain != syscall.AF_INET && sockDomain != syscall.AF_INET6 { + // non IP sockets are not handled. + sock.state = NotBypassable + logger.Debugf("socket domain=0x%x", sockDomain) + } else if sockType != syscall.SOCK_STREAM { + // only accepting TCP socket + sock.state = NotBypassable + logger.Debugf("socket type=0x%x", sockType) + } else { + // only newly created socket is allowed. + _, err := syscall.Getpeername(sockFdHost) + if err == nil { + logger.Infof("socket is already connected. socket is created via accept or forked") + sock.state = NotBypassable + } } } - err = syscall.Bind(sockfd2, bind_addr) - if err != nil { - logger.Errorf("bind failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) - return + proc.sockets[sockfd] = sock + if sock.state == NotBypassable { + logger.Debugf("socket is registered (state=%s)", sock.state) + } else { + logger.Infof("socket is registered (state=%s)", sock.state) } - logger.Infof("binding for %d:%d is done", fwdPort.HostPort, fwdPort.ChildPort) - - ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) + return sock, nil } -// handleSysClose handles `close(2)` and delete recorded socket options. -func (h *notifHandler) handleSysClose(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "close", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - logger.Trace("handle") - h.socketInfo.deleteSocket(ctx, logger) -} - -// handleSysConnect handles syscall connect(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -func (h *notifHandler) handleSysConnect(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "connect", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return - } - - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - - if !cont { - return - } - defer syscall.Close(sockfd2) - - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) - return +func (h *notifHandler) getSocket(pid int, sockfd int) *socketStatus { + proc, ok := h.processes[pid] + if !ok { + return nil } + sock := proc.sockets[sockfd] + return sock } -type msgHdrName struct { - Name uint64 - Namelen uint32 -} - -// handleSysSendto handles syscall sendmsg(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -// This handles only SOCK_DGRAM sockets. -func (h *notifHandler) handleSysSendmsg(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "sendmsg", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - msghdr := msgHdrName{} - buf, err := readProcMem(ctx.req.Pid, ctx.req.Data.Args[1], 12) - if err != nil { - logger.Errorf("failed readProcMem pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) - } - - reader := bytes.NewReader(buf) - err = binary.Read(reader, binary.LittleEndian, &msghdr) - if err != nil { - logger.Errorf("cannnot cast byte array to Msghdr: %s", err) - } - - // addrlen == 0 means the socket is already connected - if msghdr.Namelen == 0 { - return - } - - sockfd, err := getFdInProcess(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) - if err != nil { - logger.Errorf("failed to get fd: %s", err) - } - sock_domain, sock_type, _, err := getSocketArgs(sockfd) - - if err != nil { - logger.Errorf("failed to get socket args: %v", err) - return - } - - switch sock_domain { - case syscall.AF_INET, syscall.AF_INET6: - default: - logger.Debugf("only supported AF_INET, AF_INET6: %d", sock_domain) - return - } - - if sock_type != syscall.SOCK_DGRAM { - logger.Debug("only SOCK_DGRAM sockets are handled") - return - } - - addrOffset := uint64(msghdr.Name) - sa, err := readSockaddrFromProcess(ctx.req.Pid, addrOffset, uint64(msghdr.Namelen)) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return - } - - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - defer syscall.Close(sockfd2) - - if !cont { - return - } - - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("setsocketoptions failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) +func (h *notifHandler) removeSocket(pid int, sockfd int) { + defer logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}).Debugf("socket is removed") + proc, ok := h.processes[pid] + if !ok { return } + delete(proc.sockets, sockfd) } -// handleSysSendto handles syscall sendto(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -// This handles only SOCK_DGRAM sockets. -func (h *notifHandler) handleSysSendto(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "sendto", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - // addrlen == 0 is send(2) - if ctx.req.Data.Args[5] == 0 { - return - } - - sockfd, err := getFdInProcess(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) - if err != nil { - logger.Errorf("failed to get fd: %s", err) - } - defer syscall.Close(sockfd) - sock_domain, sock_type, _, err := getSocketArgs(sockfd) - +// handleReq handles seccomp notif requests and configures responses. +func (h *notifHandler) handleReq(ctx *context) { + syscallName, err := ctx.req.Data.Syscall.GetName() if err != nil { - logger.Errorf("failed to get socket args: %v", err) - return - } - - if sock_domain != syscall.AF_INET { - logger.Debugf("only supported AF_INET: %d", sock_domain) + logrus.Errorf("Error decoding syscall %v(): %s", ctx.req.Data.Syscall, err) + // TODO: error handle return } + logrus.Tracef("Received syscall %q, pid %v, arch %q, args %+v", syscallName, ctx.req.Pid, ctx.req.Data.Arch, ctx.req.Data.Args) - if sock_type != syscall.SOCK_DGRAM { - logger.Debug("only SOCK_DGRAM sockets are handled") - return - } + ctx.resp.Flags |= SeccompUserNotifFlagContinue - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[4], ctx.req.Data.Args[5]) + // ensure pid is registered in notifHandler.pidInfos + pidInfo, err := h.getPidFdInfo(int(ctx.req.Pid)) if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) + logrus.Errorf("failed to get pidfd err=%q", err) return } - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) + // threads shares file descriptors in the same process space. + // so use tgid as pid to process socket file descriptors + pid := pidInfo.tgid + if pidInfo.pidType == THREAD { + logrus.Debugf("pid %d is thread. use process's tgid %d as pid", ctx.req.Pid, pid) + } - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - defer syscall.Close(sockfd2) + // cleanup sockets when the process exit. + if syscallName == "_exit" || syscallName == "exit_group" { + if pidInfo, ok := h.pidInfos[int(ctx.req.Pid)]; ok { + syscall.Close(int(pidInfo.pidfd)) + delete(h.pidInfos, int(ctx.req.Pid)) + } + if pidInfo.pidType == THREAD { + logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid, "tgid": pid}).Infof("thread is removed") + } - if !cont { + if pidInfo.pidType == PROCESS { + delete(h.processes, pid) + if memfd, ok := h.memfds[pid]; ok { + syscall.Close(memfd) + delete(h.memfds, pid) + } + logrus.WithFields(logrus.Fields{"pid": pid}).Infof("process is removed") + } return } - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) + sockfd := int(ctx.req.Data.Args[0]) + // remove socket when closed + if syscallName == "close" { + h.removeSocket(pid, sockfd) return } - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) - return + sock := h.getSocket(pid, sockfd) + if sock == nil { + sock, err = h.registerSocket(pid, sockfd, syscallName) + if err != nil { + logrus.Errorf("failed to register socket pid %d sockfd %d: %s", pid, sockfd, err) + return + } } -} -// handleSyssetsockopt handles `setsockopt(2)` and records options. -// Recorded options are used in `handleSysConnect` or `handleSysBind` via `setSocketoptions` to configure created sockets. -func (h *notifHandler) handleSysSetsockopt(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "setsockopt", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - logger.Debugf("handle") - err := h.socketInfo.recordSocketOption(ctx, logger) - if err != nil { - logger.Errorf("recordSocketOption failed: %s", err) - } -} + switch sock.state { + case NotBypassable: + // sometimes close(2) is not called for the fd. + // To handle such condition, re-register fd when connect is called for not bypassable fd. + if syscallName == "connect" { + h.removeSocket(pid, sockfd) + sock, err = h.registerSocket(pid, sockfd, syscallName) + if err != nil { + logrus.Errorf("failed to re-register socket pid %d sockfd %d: %s", pid, sockfd, err) + return + } + } + if sock.state != NotBypassed { + return + } -// handleReq handles seccomp notif requests and configures responses. -func (h *notifHandler) handleReq(ctx *context) { - syscallName, err := ctx.req.Data.Syscall.GetName() - if err != nil { - logrus.Errorf("Error decoding syscall %v(): %s", ctx.req.Data.Syscall, err) - // TODO: error handle + // when sock.state == NotBypassed, continue + case Bypassed: + if syscallName == "getpeername" { + sock.handleSysGetpeername(h, ctx) + } return + default: } - logrus.Tracef("Received syscall %q, pid %v, arch %q, args %+v", syscallName, ctx.req.Pid, ctx.req.Data.Arch, ctx.req.Data.Args) - - ctx.resp.Flags |= SeccompUserNotifFlagContinue switch syscallName { case "bind": - h.handleSysBind(ctx) - case "close": - // handling close(2) may cause performance degradation - h.handleSysClose(ctx) + sock.handleSysBind(pid, h, ctx) case "connect": - h.handleSysConnect(ctx) - case "sendmsg": - h.handleSysSendmsg(ctx) - case "sendto": - h.handleSysSendto(ctx) + sock.handleSysConnect(h, ctx) case "setsockopt": - h.handleSysSetsockopt(ctx) + sock.handleSysSetsockopt(pid, h, ctx) + case "fcntl": + sock.handleSysFcntl(ctx) + case "getpeername": + // already handled default: logrus.Errorf("Unknown syscall %q", syscallName) // TODO: error handle @@ -678,6 +622,8 @@ type ForwardPortMapping struct { type Handler struct { socketPath string + comSocketPath string + tracerAgentLogPath string ignoredSubnets []net.IPNet ignoredSubnetsAutoUpdate bool readyFd int @@ -687,12 +633,14 @@ type Handler struct { } // NewHandler creates new seccomp notif handler -func NewHandler(socketPath string) *Handler { +func NewHandler(socketPath, comSocketPath, tracerAgentLogPath string) *Handler { handler := Handler{ - socketPath: socketPath, - ignoredSubnets: []net.IPNet{}, - forwardingPorts: map[int]ForwardPortMapping{}, - readyFd: -1, + socketPath: socketPath, + comSocketPath: comSocketPath, + tracerAgentLogPath: tracerAgentLogPath, + ignoredSubnets: []net.IPNet{}, + forwardingPorts: map[int]ForwardPortMapping{}, + readyFd: -1, } return &handler @@ -729,13 +677,61 @@ func (h *Handler) SetReadyFd(fd int) error { return nil } +type MultinodeConfig struct { + Enable bool + EtcdAddress string + HostAddress string + etcdClientConfig client.Config + etcdClient client.Client + etcdKeyApi client.KeysAPI +} + +type C2CConnectionHandleConfig struct { + Enable bool + TracerEnable bool +} + type notifHandler struct { fd libseccomp.ScmpFd state *specs.ContainerProcessState nonBypassable *nonbypassable.NonBypassable nonBypassableAutoUpdate bool - forwardingPorts map[int]ForwardPortMapping - socketInfo socketInfo + + // key is child port + forwardingPorts map[int]ForwardPortMapping + + // key is pid + processes map[int]*processStatus + + // key is destination address e.g. "192.168.1.1:1000" + containerInterfaces map[string]containerInterface + c2cConnections *C2CConnectionHandleConfig + multinode *MultinodeConfig + + // cache /proc//mem's fd to reduce latency. key is pid, value is fd + memfds map[int]int + + // cache pidfd to reduce latency. key is pid. + pidInfos map[int]pidInfo +} + +type containerInterface struct { + containerID string + hostPort int + lastCheckedUnix int64 +} + +type pidInfoPidType int + +const ( + PROCESS pidInfoPidType = iota + THREAD +) + +type pidInfo struct { + pidType pidInfoPidType + pidfd pidfd.PidFd + tgid int } func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { @@ -743,10 +739,9 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState fd: libseccomp.ScmpFd(fd), state: state, forwardingPorts: map[int]ForwardPortMapping{}, - socketInfo: socketInfo{ - options: map[string][]socketOption{}, - status: map[string]socketStatus{}, - }, + processes: map[int]*processStatus{}, + memfds: map[int]int{}, + pidInfos: map[int]pidInfo{}, } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate @@ -760,7 +755,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState } // StartHandle starts seccomp notif handler -func (h *Handler) StartHandle() { +func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeConfig *MultinodeConfig) { logrus.Info("Waiting for seccomp file descriptors") l, err := net.Listen("unix", h.socketPath) if err != nil { @@ -777,6 +772,9 @@ func (h *Handler) StartHandle() { syscall.Close(h.readyFd) } + // prepare tracer agent + var tracerAgent *tracer.Tracer = nil + for { conn, err := l.Accept() logrus.Info("accept connection") @@ -799,6 +797,248 @@ func (h *Handler) StartHandle() { logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) + notifHandler.c2cConnections = c2cConfig + notifHandler.multinode = multinodeConfig + if notifHandler.multinode.Enable { + notifHandler.multinode.etcdClientConfig = client.Config{ + Endpoints: []string{notifHandler.multinode.EtcdAddress}, + Transport: client.DefaultTransport, + HeaderTimeoutPerRequest: 2 * time.Second, + } + notifHandler.multinode.etcdClient, err = client.New(notifHandler.multinode.etcdClientConfig) + if err != nil { + logrus.WithError(err).Fatal("failed to create etcd client") + } + notifHandler.multinode.etcdKeyApi = client.NewKeysAPI(notifHandler.multinode.etcdClient) + } + + // not to run multiple tracerAgent. + // TODO: prepare only one tracerAgent in Handler + if c2cConfig.TracerEnable && !multinodeConfig.Enable && tracerAgent == nil { + tracerAgent = tracer.NewTracer(h.tracerAgentLogPath) + err = tracerAgent.StartTracer(gocontext.TODO(), state.Pid) + if err != nil { + logrus.WithError(err).Fatalf("failed to start tracer") + } + fwdPorts := []int{} + for _, v := range notifHandler.forwardingPorts { + fwdPorts = append(fwdPorts, v.ChildPort) + } + err = tracerAgent.RegisterForwardPorts(fwdPorts) + if err != nil { + logrus.WithError(err).Fatalf("failed to register port") + } + logrus.WithField("fwdPorts", fwdPorts).Info("registered ports to tracer agent") + + // check tracer agent is ready + for _, v := range fwdPorts { + dst := fmt.Sprintf("127.0.0.1:%d", v) + addr, err := tracerAgent.ConnectToAddress([]string{dst}) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", dst) + continue + } + if len(addr) != 1 || addr[0] != dst { + logrus.Fatalf("failed to connect to %s", dst) + continue + } + logrus.Debugf("successfully connected to %s", dst) + } + logrus.Infof("tracer is ready") + } else { + logrus.Infof("tracer is disabled") + } + + // TODO: these goroutines shoud be launched only once. + ready := make(chan bool, 10) + if notifHandler.multinode.Enable { + go notifHandler.startBackgroundMultinodeTask(ready) + } else if notifHandler.c2cConnections.Enable { + go notifHandler.startBackgroundC2CConnectionHandleTask(ready, h.comSocketPath, tracerAgent) + } else { + ready <- true + } + + // wait for background tasks becoming ready + <-ready + logrus.Info("background task is ready. start to handle") go notifHandler.handle() } } + +func (h *notifHandler) startBackgroundC2CConnectionHandleTask(ready chan bool, comSocketPath string, tracerAgent *tracer.Tracer) { + initDone := false + logrus.Info("Started bypass4netns background task") + comClient, err := com.NewComClient(comSocketPath) + if err != nil { + logrus.Fatalf("failed to create ComClient: %q", err) + } + err = comClient.Ping(gocontext.TODO()) + if err != nil { + logrus.Fatalf("failed to connect to bypass4netnsd: %q", err) + } + logrus.Infof("Successfully connected to bypass4netnsd") + ifLastUpdateUnix := int64(0) + for { + if ifLastUpdateUnix+10 < time.Now().Unix() { + addrs, err := iproute2.GetAddressesInNetNS(gocontext.TODO(), h.state.Pid) + if err != nil { + logrus.WithError(err).Errorf("failed to get addresses") + return + } + ifs, err := iproute2AddressesToComInterfaces(addrs) + if err != nil { + logrus.WithError(err).Errorf("failed to convert addresses") + return + } + containerIfs := &com.ContainerInterfaces{ + ContainerID: h.state.State.ID, + Interfaces: ifs, + ForwardingPorts: map[int]int{}, + } + for _, v := range h.forwardingPorts { + containerIfs.ForwardingPorts[v.ChildPort] = v.HostPort + } + logrus.Debugf("Interfaces = %v", containerIfs) + _, err = comClient.PostInterface(gocontext.TODO(), containerIfs) + if err != nil { + logrus.WithError(err).Errorf("failed to post interfaces") + } else { + logrus.Infof("successfully posted updated interfaces") + ifLastUpdateUnix = time.Now().Unix() + } + } + containerInterfaces, err := comClient.ListInterfaces(gocontext.TODO()) + if err != nil { + logrus.WithError(err).Warn("failed to list container interfaces") + } + + containerIf := map[string]containerInterface{} + for _, cont := range containerInterfaces { + for contPort, hostPort := range cont.ForwardingPorts { + for _, intf := range cont.Interfaces { + if intf.IsLoopback { + continue + } + for _, addr := range intf.Addresses { + // ignore ipv6 address + if addr.IP.To4() == nil { + continue + } + dstAddr := fmt.Sprintf("%s:%d", addr.IP, contPort) + contIf, ok := h.containerInterfaces[dstAddr] + if ok && contIf.lastCheckedUnix+10 > time.Now().Unix() { + containerIf[dstAddr] = contIf + continue + } + if h.c2cConnections.TracerEnable { + addrRes, err := tracerAgent.ConnectToAddress([]string{dstAddr}) + if err != nil { + logrus.WithError(err).Debugf("failed to connect to %s", dstAddr) + continue + } + if len(addrRes) != 1 || addrRes[0] != dstAddr { + logrus.Debugf("failed to connect to %s", dstAddr) + continue + } + logrus.Debugf("successfully connected to %s", dstAddr) + } + containerIf[dstAddr] = containerInterface{ + containerID: cont.ContainerID, + hostPort: hostPort, + lastCheckedUnix: time.Now().Unix(), + } + logrus.Infof("%s -> 127.0.0.1:%d is registered", dstAddr, hostPort) + } + } + } + } + h.containerInterfaces = containerIf + + // once the interfaces are registered, it is ready to handle connections + if !initDone { + initDone = true + ready <- true + } + + time.Sleep(1 * time.Second) + } +} + +func iproute2AddressesToComInterfaces(addrs iproute2.Addresses) ([]com.Interface, error) { + comIntfs := []com.Interface{} + for _, intf := range addrs { + comIntf := com.Interface{ + Name: intf.IfName, + Addresses: []net.IPNet{}, + IsLoopback: intf.LinkType == "loopback", + } + hwAddr, err := net.ParseMAC(intf.Address) + if err != nil { + return nil, fmt.Errorf("failed to parse HWAddress: %w", err) + } + comIntf.HWAddr = hwAddr + for _, addr := range intf.AddrInfos { + ip, ipNet, err := net.ParseCIDR(fmt.Sprintf("%s/%d", addr.Local, addr.PrefixLen)) + if err != nil { + return nil, fmt.Errorf("failed to parse addr_info: %w", err) + } + ipNet.IP = ip + comIntf.Addresses = append(comIntf.Addresses, *ipNet) + } + + comIntfs = append(comIntfs, comIntf) + } + + return comIntfs, nil +} + +func (h *notifHandler) startBackgroundMultinodeTask(ready chan bool) { + initDone := false + ifLastUpdateUnix := int64(0) + for { + if ifLastUpdateUnix+10 < time.Now().Unix() { + ifs, err := iproute2.GetAddressesInNetNS(gocontext.TODO(), h.state.Pid) + if err != nil { + logrus.WithError(err).Errorf("failed to get addresses") + return + } + for _, intf := range ifs { + // ignore non-ethernet interface + if intf.LinkType != "ether" { + continue + } + for _, addr := range intf.AddrInfos { + // ignore non-IPv4 address + if addr.Family != "inet" { + continue + } + for _, v := range h.forwardingPorts { + containerAddr := fmt.Sprintf("%s:%d", addr.Local, v.ChildPort) + hostAddr := fmt.Sprintf("%s:%d", h.multinode.HostAddress, v.HostPort) + // Remove entries with timeout + // TODO: Remove related entries when exiting. + opts := &client.SetOptions{ + TTL: time.Second * 15, + } + _, err := h.multinode.etcdKeyApi.Set(gocontext.TODO(), ETCD_MULTINODE_PREFIX+containerAddr, hostAddr, opts) + if err != nil { + logrus.WithError(err).Errorf("failed to register %s -> %s", containerAddr, hostAddr) + } else { + logrus.Infof("Registered %s -> %s", containerAddr, hostAddr) + } + } + } + } + ifLastUpdateUnix = time.Now().Unix() + + // once the interfaces are registered, it is ready to handle connections + if !initDone { + initDone = true + ready <- true + } + } + + time.Sleep(1 * time.Second) + } +} diff --git a/pkg/bypass4netns/iproute2/iproute2.go b/pkg/bypass4netns/iproute2/iproute2.go new file mode 100644 index 0000000..9496278 --- /dev/null +++ b/pkg/bypass4netns/iproute2/iproute2.go @@ -0,0 +1,88 @@ +package iproute2 + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "strconv" + + "github.com/rootless-containers/bypass4netns/pkg/util" + "golang.org/x/sys/unix" +) + +type AddrInfo struct { + Family string `json:"family"` + Local string `json:"local"` + PrefixLen int `json:"prefixlen"` + Broadcast string `json:"broadcast"` + Scope string `json:"scope"` + Label string `json:"label"` + ValidLifeTime int `json:"valid_life_time"` + PreferredLifeTime int `json:"preferred_life_time"` +} + +type Interface struct { + IfIndex int `json:"ifindex"` + IfName string `json:"ifname"` + Flags []string `json:"flags"` + Mtu int `json:"mtu"` + Qdisc string `json:"noqueue"` + Operstate string `json:"operstate"` + Group string `json:"group"` + TxQLen int `json:"txqlen"` + LinkType string `json:"link_type"` + Address string `json:"address"` + Broadcast string `json:"broadcast"` + AddrInfos []AddrInfo `json:"addr_info"` +} + +type Addresses = []Interface + +func UnmarshalAddress(jsonAddrs []byte) (Addresses, error) { + var addrs = Addresses{} + + err := json.Unmarshal(jsonAddrs, &addrs) + if err != nil { + return nil, err + } + + return addrs, nil +} + +func GetAddressesInNetNS(ctx context.Context, pid int) (Addresses, error) { + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return nil, err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(pid), + "-F", + "-n", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(pid, selfPid) + if err != nil { + return nil, fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", "ip", "-j", "addr", "show") + cmd := exec.CommandContext(ctx, nsenter, nsenterFlags...) + cmd.SysProcAttr = &unix.SysProcAttr{ + Pdeathsig: unix.SIGTERM, + } + stdout, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to start %v: %w", cmd.Args, err) + } + + addrs, err := UnmarshalAddress(stdout) + if err != nil { + return nil, fmt.Errorf("failed to parse json: %w", err) + } + + return addrs, nil +} diff --git a/pkg/bypass4netns/iproute2/iproute2_test.go b/pkg/bypass4netns/iproute2/iproute2_test.go new file mode 100644 index 0000000..184d992 --- /dev/null +++ b/pkg/bypass4netns/iproute2/iproute2_test.go @@ -0,0 +1,190 @@ +package iproute2 + +import ( + "fmt" + "net" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUnmarshalAddress(t *testing.T) { + testJson := ` +[ + { + "ifindex":1, + "ifname":"lo", + "flags":[ + "LOOPBACK", + "UP", + "LOWER_UP" + ], + "mtu":65536, + "qdisc":"noqueue", + "operstate":"UNKNOWN", + "group":"default", + "txqlen":1000, + "link_type":"loopback", + "address":"00:00:00:00:00:00", + "broadcast":"00:00:00:00:00:00", + "addr_info":[ + { + "family":"inet", + "local":"127.0.0.1", + "prefixlen":8, + "scope":"host", + "label":"lo", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + }, + { + "family":"inet6", + "local":"::1", + "prefixlen":128, + "scope":"host", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":2, + "ifname":"enp1s0", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"fq_codel", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"52:54:00:c3:92:b6", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"192.168.1.155", + "prefixlen":24, + "broadcast":"192.168.1.255", + "scope":"global", + "label":"enp1s0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + }, + { + "family":"inet6", + "local":"fe80::5054:ff:fec3:92b6", + "prefixlen":64, + "scope":"link", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":3, + "ifname":"docker0", + "flags":[ + "NO-CARRIER", + "BROADCAST", + "MULTICAST", + "UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "operstate":"DOWN", + "group":"default", + "link_type":"ether", + "address":"02:42:ab:c8:78:84", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"172.17.0.1", + "prefixlen":16, + "broadcast":"172.17.255.255", + "scope":"global", + "label":"docker0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":61, + "ifname":"lxdbr0", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"00:16:3e:4d:92:98", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"192.168.6.1", + "prefixlen":24, + "scope":"global", + "label":"lxdbr0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":71, + "link_index":70, + "ifname":"veth71db11e7", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "master":"lxdbr0", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"da:83:f0:97:c7:14", + "broadcast":"ff:ff:ff:ff:ff:ff", + "link_netnsid":0, + "addr_info":[ + + ] + } +] + ` + + addrs, err := UnmarshalAddress([]byte(testJson)) + assert.Equal(t, nil, err) + assert.Equal(t, 5, len(addrs)) + intf := addrs[1] + assert.Equal(t, "UP", intf.Operstate) + assert.Equal(t, "ether", intf.LinkType) + assert.Equal(t, 2, len(intf.AddrInfos)) + addr := intf.AddrInfos[0] + assert.Equal(t, "inet", addr.Family) + assert.Equal(t, "192.168.1.155", addr.Local) + addrIp, addrCidr, err := net.ParseCIDR(fmt.Sprintf("%s/%d", addr.Local, addr.PrefixLen)) + assert.Equal(t, nil, err) + addrCidr.IP = addrIp + assert.Equal(t, "192.168.1.155/24", addrCidr.String()) + addr2 := intf.AddrInfos[1] + assert.Equal(t, "inet6", addr2.Family) + assert.Equal(t, "fe80::5054:ff:fec3:92b6", addr2.Local) +} diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index ff6eab2..cc0bf61 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -15,6 +15,7 @@ import ( "sync" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent/types" + "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -45,6 +46,33 @@ func (x *NonBypassable) Contains(ip net.IP) bool { return false } +//func (x *NonBypassable) IsInterfaceIPAddress(ip net.IP) bool { +// x.mu.RLock() +// defer x.mu.RUnlock() +// for _, intf := range x.interfaces { +// for _, intfIP := range intf.Addresses { +// if intfIP.IP.Equal(ip) { +// return true +// } +// } +// } +// +// return false +//} +// +//func (x *NonBypassable) GetInterfaces() []com.Interface { +// x.mu.RLock() +// defer x.mu.RUnlock() +// ips := append([]com.Interface{}, x.interfaces...) +// return ips +//} +// +//func (x *NonBypassable) GetLastUpdateUnix() int64 { +// x.mu.RLock() +// defer x.mu.RUnlock() +// return x.lastUpdateUnix +//} + // WatchNS watches the NS associated with the PID and updates the internal dynamic list on receiving SIGHUP. func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { selfExe, err := os.Executable() @@ -61,7 +89,7 @@ func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { "-n", } selfPid := os.Getpid() - ok, err := sameUserNS(pid, selfPid) + ok, err := util.SameUserNS(pid, selfPid) if err != nil { return fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) } @@ -129,17 +157,3 @@ func (x *NonBypassable) watchNS(r io.Reader) { } } } - -func sameUserNS(pidX, pidY int) (bool, error) { - nsX := fmt.Sprintf("/proc/%d/ns/user", pidX) - nsY := fmt.Sprintf("/proc/%d/ns/user", pidY) - nsXResolved, err := os.Readlink(nsX) - if err != nil { - return false, err - } - nsYResolved, err := os.Readlink(nsY) - if err != nil { - return false, err - } - return nsXResolved == nsYResolved, nil -} diff --git a/pkg/bypass4netns/sockaddr.go b/pkg/bypass4netns/sockaddr.go index 2e7e70a..4cc9da7 100644 --- a/pkg/bypass4netns/sockaddr.go +++ b/pkg/bypass4netns/sockaddr.go @@ -16,6 +16,10 @@ type sockaddr struct { ScopeID uint32 // sin6_scope_id } +func (sa *sockaddr) String() string { + return fmt.Sprintf("%s:%d", sa.IP, sa.Port) +} + func newSockaddr(buf []byte) (*sockaddr, error) { sa := &sockaddr{} reader := bytes.NewReader(buf) @@ -34,9 +38,7 @@ func newSockaddr(buf []byte) (*sockaddr, error) { return nil, fmt.Errorf("cannot cast byte array to RawSockaddrInet4: %w", err) } sa.IP = make(net.IP, len(addr4.Addr)) - for i, x := range addr4.Addr { // nolint: gosimple - sa.IP[i] = x - } + copy(sa.IP, addr4.Addr[:]) p := make([]byte, 2) binary.BigEndian.PutUint16(p, addr4.Port) sa.Port = int(endian.Uint16(p)) @@ -49,9 +51,7 @@ func newSockaddr(buf []byte) (*sockaddr, error) { return nil, fmt.Errorf("cannot cast byte array to RawSockaddrInet6: %w", err) } sa.IP = make(net.IP, len(addr6.Addr)) - for i, x := range addr6.Addr { // nolint: gosimple - sa.IP[i] = x - } + copy(sa.IP, addr6.Addr[:]) p := make([]byte, 2) binary.BigEndian.PutUint16(p, addr6.Port) sa.Port = int(endian.Uint16(p)) @@ -62,3 +62,42 @@ func newSockaddr(buf []byte) (*sockaddr, error) { } return sa, nil } + +func (sa *sockaddr) toBytes() ([]byte, error) { + res := bytes.Buffer{} + // TODO: support big endian hosts + endian := binary.LittleEndian + + // ntohs + p := make([]byte, 2) + binary.BigEndian.PutUint16(p, uint16(sa.Port)) + + switch sa.Family { + case syscall.AF_INET: + addr4 := syscall.RawSockaddrInet4{} + addr4.Family = syscall.AF_INET + copy(addr4.Addr[:], sa.IP.To4()[:]) + + addr4.Port = endian.Uint16(p) + err := binary.Write(&res, endian, addr4) + if err != nil { + return nil, err + } + case syscall.AF_INET6: + addr6 := syscall.RawSockaddrInet6{} + addr6.Family = syscall.AF_INET6 + copy(addr6.Addr[:], sa.IP.To16()[:]) + + addr6.Port = endian.Uint16(p) + addr6.Flowinfo = sa.Flowinfo + addr6.Scope_id = sa.ScopeID + err := binary.Write(&res, endian, addr6) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("expected AF_INET or AF_INET6, got %d", sa.Family) + } + + return res.Bytes(), nil +} diff --git a/pkg/bypass4netns/sockaddr_test.go b/pkg/bypass4netns/sockaddr_test.go new file mode 100644 index 0000000..c48b555 --- /dev/null +++ b/pkg/bypass4netns/sockaddr_test.go @@ -0,0 +1,53 @@ +package bypass4netns + +import ( + "net" + "syscall" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSerializeDeserializeSockaddr4(t *testing.T) { + ip := net.ParseIP("192.168.1.100") + port := 12345 + + sa := sockaddr{ + IP: ip, + Port: port, + } + sa.Family = syscall.AF_INET + + saBytes, err := sa.toBytes() + assert.Equal(t, nil, err) + assert.Equal(t, 16, len(saBytes)) + + sa2, err := newSockaddr(saBytes) + assert.Equal(t, nil, err) + assert.Equal(t, ip.String(), sa2.IP.String()) + assert.Equal(t, port, sa2.Port) +} + +func TestSerializeDeserializeSockaddr6(t *testing.T) { + ip := net.ParseIP("2001:0db8::1:0:0:1") + port := 12345 + + sa := sockaddr{ + IP: ip, + Port: port, + Flowinfo: 0x12345678, + ScopeID: 0x9abcdef0, + } + sa.Family = syscall.AF_INET6 + + saBytes, err := sa.toBytes() + assert.Equal(t, nil, err) + assert.Equal(t, 28, len(saBytes)) + + sa2, err := newSockaddr(saBytes) + assert.Equal(t, nil, err) + assert.Equal(t, ip.String(), sa2.IP.String()) + assert.Equal(t, port, sa2.Port) + assert.Equal(t, sa.Flowinfo, uint32(0x12345678)) + assert.Equal(t, sa.ScopeID, uint32(0x9abcdef0)) +} diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 0849cd8..a9cdc55 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -1,11 +1,17 @@ package bypass4netns import ( + gocontext "context" + "encoding/binary" "fmt" + "net" + "strconv" + "strings" "syscall" "unsafe" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) type socketOption struct { @@ -15,62 +21,90 @@ type socketOption struct { optlen uint64 } +// Handle F_SETFL, F_SETFD options +type fcntlOption struct { + cmd uint64 + value uint64 +} + type socketState int const ( + // NotBypassableSocket means that the fd is not socket or not bypassed + NotBypassable socketState = iota + + // NotBypassed means that the socket is not bypassed. + NotBypassed + // Bypassed means that the socket is replaced by one created on the host - Bypassed socketState = iota + Bypassed - // SwitchBacked means that the socket was bypassed but now rereplaced to the socket in netns. - // This state can be hannpend in connect(2), sendto(2) and sendmsg(2) - // when connecting to a host outside of netns and then connecting to a host inside of netns with same fd. - SwitchBacked + // Error happened after bypass. Nothing can be done to recover from this state. + Error ) -type socketStatus struct { - state socketState - fdInNetns int - fdInHost int +func (ss socketState) String() string { + switch ss { + case NotBypassable: + return "NotBypassable" + case NotBypassed: + return "NotBypassed" + case Bypassed: + return "Bypassed" + case Error: + return "Error" + default: + panic(fmt.Sprintf("unexpected enum %d: String() is not implmented", ss)) + } } -type socketInfo struct { - options map[string][]socketOption - status map[string]socketStatus +type processStatus struct { + sockets map[int]*socketStatus } -// configureSocket set recorded socket options. -func (info *socketInfo) configureSocket(ctx *context, sockfd int) error { - key := fmt.Sprintf("%d:%d", ctx.req.Pid, ctx.req.Data.Args[0]) - optValues, ok := info.options[key] - if !ok { - return nil - } - for _, optVal := range optValues { - _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) - if errno != 0 { - return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) - } - logrus.Debugf("configured socket option pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, optVal) +func newProcessStatus() *processStatus { + return &processStatus{ + sockets: map[int]*socketStatus{}, } +} - return nil +type socketStatus struct { + state socketState + pid int + sockfd int + sockDomain int + sockType int + sockProto int + // address for bind or connect + addr *sockaddr + socketOptions []socketOption + fcntlOptions []fcntlOption + + logger *logrus.Entry +} + +func newSocketStatus(pid int, sockfd int, sockDomain, sockType, sockProto int) *socketStatus { + return &socketStatus{ + state: NotBypassed, + pid: pid, + sockfd: sockfd, + sockDomain: sockDomain, + sockType: sockType, + sockProto: sockProto, + socketOptions: []socketOption{}, + fcntlOptions: []fcntlOption{}, + logger: logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}), + } } -// recordSocketOption records socket option. -func (info *socketInfo) recordSocketOption(ctx *context, logger *logrus.Entry) error { - sockfd := ctx.req.Data.Args[0] +func (ss *socketStatus) handleSysSetsockopt(pid int, handler *notifHandler, ctx *context) { + ss.logger.Debug("handle setsockopt") level := ctx.req.Data.Args[1] optname := ctx.req.Data.Args[2] optlen := ctx.req.Data.Args[4] - optval, err := readProcMem(ctx.req.Pid, ctx.req.Data.Args[3], optlen) + optval, err := handler.readProcMem(pid, ctx.req.Data.Args[3], optlen) if err != nil { - return fmt.Errorf("readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) - } - - key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) - _, ok := info.options[key] - if !ok { - info.options[key] = make([]socketOption, 0) + ss.logger.Errorf("setsockopt readProcMem failed pid %v offset 0x%x: %s", pid, ctx.req.Data.Args[1], err) } value := socketOption{ @@ -79,27 +113,317 @@ func (info *socketInfo) recordSocketOption(ctx *context, logger *logrus.Entry) e optval: optval, optlen: optlen, } - info.options[key] = append(info.options[key], value) + ss.socketOptions = append(ss.socketOptions, value) - logger.Debugf("recorded socket option sockfd=%d level=%d optname=%d optval=%v optlen=%d", sockfd, level, optname, optval, optlen) - return nil + ss.logger.Debugf("setsockopt level=%d optname=%d optval=%v optlen=%d was recorded.", level, optname, optval, optlen) } -// deleteSocketOptions delete recorded socket options and status -func (info *socketInfo) deleteSocket(ctx *context, logger *logrus.Entry) { - sockfd := ctx.req.Data.Args[0] - key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) - _, ok := info.options[key] - if ok { - delete(info.options, key) - logger.Debugf("removed socket options") +func (ss *socketStatus) handleSysFcntl(ctx *context) { + ss.logger.Debug("handle fcntl") + fcntlCmd := ctx.req.Data.Args[1] + switch fcntlCmd { + case unix.F_SETFD: // 0x2 + case unix.F_SETFL: // 0x4 + opt := fcntlOption{ + cmd: fcntlCmd, + value: ctx.req.Data.Args[2], + } + ss.fcntlOptions = append(ss.fcntlOptions, opt) + ss.logger.Debugf("fcntl cmd=0x%x value=%d was recorded.", fcntlCmd, opt.value) + case unix.F_GETFL: // 0x3 + // ignore these + default: + ss.logger.Warnf("Unknown fcntl command 0x%x ignored.", fcntlCmd) } +} - status, ok := info.status[key] +func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { + destAddr, err := handler.readSockaddrFromProcess(ss.pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + if err != nil { + ss.logger.Errorf("failed to read sockaddr from process: %q", err) + return + } + ss.addr = destAddr + + var newDestAddr net.IP + switch destAddr.Family { + case syscall.AF_INET: + newDestAddr = net.IPv4zero + newDestAddr = newDestAddr.To4() + newDestAddr[0] = 127 + newDestAddr[3] = 1 + case syscall.AF_INET6: + newDestAddr = net.IPv6loopback + newDestAddr = newDestAddr.To16() + default: + ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) + ss.state = Error + return + } + + // check whether the destination is bypassed or not. + connectToLoopback := false + connectToInterface := false + connectToOtherBypassedContainer := false + fwdPort, ok := handler.forwardingPorts[int(destAddr.Port)] if ok { - delete(info.status, key) - syscall.Close(status.fdInNetns) - syscall.Close(status.fdInHost) - logger.Debugf("removed socket status(fdInNetns=%d fdInHost=%d)", status.fdInNetns, status.fdInHost) + if destAddr.IP.IsLoopback() { + ss.logger.Infof("destination address %v is loopback and bypassed", destAddr) + connectToLoopback = true + } else if contIf, ok := handler.containerInterfaces[destAddr.String()]; ok && contIf.containerID == handler.state.State.ID { + ss.logger.Infof("destination address %v is interface's address and bypassed", destAddr) + connectToInterface = true + } + } + + if handler.multinode.Enable && destAddr.IP.IsPrivate() { + // currently, only private addresses are available in multinode communication. + key := ETCD_MULTINODE_PREFIX + destAddr.String() + res, err := handler.multinode.etcdKeyApi.Get(gocontext.TODO(), ETCD_MULTINODE_PREFIX+destAddr.String(), nil) + if err != nil { + ss.logger.WithError(err).Warnf("destination address %q is not registered", key) + } else { + hostAddrWithPort := res.Node.Value + hostAddrs := strings.Split(hostAddrWithPort, ":") + if len(hostAddrs) != 2 { + ss.logger.Errorf("invalid address format %q", hostAddrWithPort) + ss.state = Error + return + } + hostAddr := hostAddrs[0] + hostPort, err := strconv.Atoi(hostAddrs[1]) + if err != nil { + ss.logger.Errorf("invalid address format %q", hostAddrWithPort) + ss.state = Error + return + } + newDestAddr = net.ParseIP(hostAddr) + fwdPort.HostPort = hostPort + connectToOtherBypassedContainer = true + ss.logger.Infof("destination address %v is container address and bypassed via overlay network", destAddr) + } + } else if handler.c2cConnections.Enable { + contIf, ok := handler.containerInterfaces[destAddr.String()] + if ok { + ss.logger.Infof("destination address %v is container address and bypassed", destAddr) + fwdPort.HostPort = contIf.hostPort + connectToOtherBypassedContainer = true + } + } + + // check whether the destination container socket is bypassed or not. + isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) + + if !connectToLoopback && !connectToInterface && !connectToOtherBypassedContainer && isNotBypassed { + ss.logger.Infof("destination address %v is not bypassed.", destAddr.IP) + ss.state = NotBypassable + return + } + + sockfdOnHost, err := syscall.Socket(ss.sockDomain, ss.sockType, ss.sockProto) + if err != nil { + ss.logger.Errorf("failed to create socket: %q", err) + ss.state = NotBypassable + return + } + defer syscall.Close(sockfdOnHost) + + err = ss.configureSocket(sockfdOnHost) + if err != nil { + ss.logger.Errorf("failed to configure socket: %q", err) + ss.state = NotBypassable + return } + + addfd := seccompNotifAddFd{ + id: ctx.req.ID, + flags: SeccompAddFdFlagSetFd, + srcfd: uint32(sockfdOnHost), + newfd: uint32(ctx.req.Data.Args[0]), + newfdFlags: 0, + } + + err = addfd.ioctlNotifAddFd(ctx.notifFd) + if err != nil { + ss.logger.Errorf("ioctl NotifAddFd failed: %q", err) + ss.state = NotBypassable + return + } + + if connectToLoopback || connectToInterface || connectToOtherBypassedContainer { + p := make([]byte, 2) + binary.BigEndian.PutUint16(p, uint16(fwdPort.HostPort)) + // writing host port at sock_addr's port offset + // TODO: should we return dummy value when getpeername(2) is called? + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+2, p) + if err != nil { + ss.logger.Errorf("failed to rewrite destination port: %q", err) + ss.state = Error + return + } + ss.logger.Infof("destination's port %d is rewritten to host-side port %d", ss.addr.Port, fwdPort.HostPort) + } + + if connectToInterface || connectToOtherBypassedContainer { + // writing host's loopback address to connect to bypassed socket at sock_addr's address offset + // TODO: should we return dummy value when getpeername(2) is called? + switch destAddr.Family { + case syscall.AF_INET: + newDestAddr = newDestAddr.To4() + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, newDestAddr[0:4]) + case syscall.AF_INET6: + newDestAddr = newDestAddr.To16() + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, newDestAddr[0:16]) + default: + ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) + ss.state = Error + return + } + if err != nil { + ss.logger.Errorf("failed to rewrite destination address: %q", err) + ss.state = Error + return + } + + ss.logger.Infof("destination address %s is rewritten to %s", destAddr.IP, newDestAddr) + } + + ss.state = Bypassed + ss.logger.Infof("bypassed connect socket destAddr=%s", ss.addr) +} + +func (ss *socketStatus) handleSysBind(pid int, handler *notifHandler, ctx *context) { + sa, err := handler.readSockaddrFromProcess(pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + if err != nil { + ss.logger.Errorf("failed to read sockaddr from process: %q", err) + ss.state = NotBypassable + return + } + ss.addr = sa + + ss.logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) + + // TODO: get port-fowrad mapping from nerdctl + fwdPort, ok := handler.forwardingPorts[int(sa.Port)] + if !ok { + ss.logger.Infof("port=%d is not target of port forwarding.", sa.Port) + ss.state = NotBypassable + return + } + + sockfdOnHost, err := syscall.Socket(ss.sockDomain, ss.sockType, ss.sockProto) + if err != nil { + ss.logger.Errorf("failed to create socket: %q", err) + ss.state = NotBypassable + return + } + defer syscall.Close(sockfdOnHost) + + err = ss.configureSocket(sockfdOnHost) + if err != nil { + ss.logger.Errorf("failed to configure socket: %q", err) + ss.state = NotBypassable + return + } + + var bind_addr syscall.Sockaddr + + switch sa.Family { + case syscall.AF_INET: + var addr [4]byte + for i := 0; i < 4; i++ { + addr[i] = sa.IP[i] + } + bind_addr = &syscall.SockaddrInet4{ + Port: fwdPort.HostPort, + Addr: addr, + } + case syscall.AF_INET6: + var addr [16]byte + for i := 0; i < 16; i++ { + addr[i] = sa.IP[i] + } + bind_addr = &syscall.SockaddrInet6{ + Port: fwdPort.HostPort, + ZoneId: sa.ScopeID, + Addr: addr, + } + } + + err = syscall.Bind(sockfdOnHost, bind_addr) + if err != nil { + ss.logger.Errorf("bind failed: %s", err) + ss.state = NotBypassable + return + } + + addfd := seccompNotifAddFd{ + id: ctx.req.ID, + flags: SeccompAddFdFlagSetFd, + srcfd: uint32(sockfdOnHost), + newfd: uint32(ctx.req.Data.Args[0]), + newfdFlags: 0, + } + + err = addfd.ioctlNotifAddFd(ctx.notifFd) + if err != nil { + ss.logger.Errorf("ioctl NotifAddFd failed: %s", err) + ss.state = NotBypassable + return + } + + ss.state = Bypassed + ss.logger.Infof("bypassed bind socket for %d:%d is done", fwdPort.HostPort, fwdPort.ChildPort) + + ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) +} + +func (ss *socketStatus) handleSysGetpeername(handler *notifHandler, ctx *context) { + if ss.addr == nil { + return + } + + buf, err := ss.addr.toBytes() + if err != nil { + ss.logger.WithError(err).Errorf("failed to serialize address %s", ss.addr) + return + } + + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1], buf) + if err != nil { + ss.logger.WithError(err).Errorf("failed to write address %s", ss.addr) + return + } + + bufLen := make([]byte, 4) + binary.LittleEndian.PutUint32(bufLen, uint32(len(buf))) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[2], bufLen) + if err != nil { + ss.logger.WithError(err).Errorf("failed to write address length %d", len(buf)) + return + } + + ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) + + ss.logger.Infof("rewrite getpeername() address to %s", ss.addr) +} + +func (ss *socketStatus) configureSocket(sockfd int) error { + for _, optVal := range ss.socketOptions { + _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) + if errno != 0 { + return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) + } + ss.logger.Debugf("configured socket option val=%v", optVal) + } + + for _, fcntlVal := range ss.fcntlOptions { + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(sockfd), uintptr(fcntlVal.cmd), uintptr(fcntlVal.value)) + if errno != 0 { + return fmt.Errorf("fnctl failed(%v): %s", fcntlVal, errno) + } + ss.logger.Debugf("configured socket fcntl val=%v", fcntlVal) + } + + return nil } diff --git a/pkg/bypass4netns/tracer/agent.go b/pkg/bypass4netns/tracer/agent.go new file mode 100644 index 0000000..f36c146 --- /dev/null +++ b/pkg/bypass4netns/tracer/agent.go @@ -0,0 +1,112 @@ +package tracer + +import ( + "encoding/json" + "fmt" + "net" + "os" + "time" + + "github.com/sirupsen/logrus" +) + +type TracerCommand struct { + Cmd TracerCmd `json:"tracerCmd"` + ForwardingPorts []int `json:"forwardingPorts,omitempty"` + DestinationAddress []string `json:"destinationAddress,omitempty"` +} + +type TracerCmd int + +const ( + Ok TracerCmd = iota + RegisterForwardPorts + ConnectToAddress +) + +func Main() error { + r := os.Stdin + w := os.Stdout + dec := json.NewDecoder(r) + for { + var cmd TracerCommand + err := dec.Decode(&cmd) + if err != nil { + logrus.WithError(err).Errorf("failed to decode") + break + } + logrus.Infof("decoded = %v", cmd) + switch cmd.Cmd { + case RegisterForwardPorts: + for _, p := range cmd.ForwardingPorts { + readyChan := make(chan bool) + go func(port int, c chan bool) { + err := listenLoop(port, c) + if err != nil { + logrus.WithError(err).Errorf("failed to listen on port %d", port) + } + }(p, readyChan) + <-readyChan + } + cmd = TracerCommand{ + Cmd: Ok, + } + case ConnectToAddress: + addrs := []string{} + for i := range cmd.DestinationAddress { + addr := cmd.DestinationAddress[i] + err = tryToConnect(addr) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", addr) + continue + } + addrs = append(addrs, addr) + } + cmd = TracerCommand{ + Cmd: Ok, + DestinationAddress: addrs, + } + } + + m, err := json.Marshal(cmd) + if err != nil { + logrus.WithError(err).Errorf("failed to encode") + } + _, err = w.Write(m) + if err != nil { + logrus.WithError(err).Errorf("failed to write") + } + } + + logrus.Infof("Exit.") + return nil +} + +func listenLoop(port int, readyChan chan bool) error { + l, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + return err + } + defer l.Close() + + readyChan <- true + logrus.Infof("started to listen on port %d", port) + for { + conn, err := l.Accept() + if err != nil { + return err + } + conn.Close() + } +} + +func tryToConnect(addr string) error { + conn, err := net.DialTimeout("tcp", addr, 10*time.Millisecond) + if err != nil { + return err + } + defer conn.Close() + logrus.Infof("successfully connected to %s", addr) + + return nil +} diff --git a/pkg/bypass4netns/tracer/tracer.go b/pkg/bypass4netns/tracer/tracer.go new file mode 100644 index 0000000..97d1608 --- /dev/null +++ b/pkg/bypass4netns/tracer/tracer.go @@ -0,0 +1,137 @@ +package tracer + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "sync" + + "github.com/rootless-containers/bypass4netns/pkg/util" + "golang.org/x/sys/unix" +) + +type Tracer struct { + logPath string + tracerCmd *exec.Cmd + reader io.Reader + writer io.Writer + + lock sync.Mutex +} + +func NewTracer(logPath string) *Tracer { + return &Tracer{ + logPath: logPath, + lock: sync.Mutex{}, + } +} + +// StartTracer starts tracer in NS associated with the PID. +func (x *Tracer) StartTracer(ctx context.Context, pid int) error { + selfExe, err := os.Executable() + if err != nil { + return err + } + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(pid), + "-F", + "-n", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(pid, selfPid) + if err != nil { + return fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", selfExe, "--tracer-agent", "--log-file", x.logPath) + x.tracerCmd = exec.CommandContext(ctx, nsenter, nsenterFlags...) + x.tracerCmd.SysProcAttr = &unix.SysProcAttr{ + Pdeathsig: unix.SIGTERM, + } + x.tracerCmd.Stderr = os.Stderr + x.reader, x.tracerCmd.Stdout = io.Pipe() + x.tracerCmd.Stdin, x.writer = io.Pipe() + if err := x.tracerCmd.Start(); err != nil { + return fmt.Errorf("failed to start %v: %w", x.tracerCmd.Args, err) + } + return nil +} + +func (x *Tracer) RegisterForwardPorts(ports []int) error { + cmd := TracerCommand{ + Cmd: RegisterForwardPorts, + ForwardingPorts: ports, + } + + m, err := json.Marshal(cmd) + if err != nil { + return err + } + + writeSize, err := x.writer.Write(m) + if err != nil { + return err + } + if writeSize != len(m) { + return fmt.Errorf("unexpected written size expected=%d actual=%d", len(m), writeSize) + } + + dec := json.NewDecoder(x.reader) + var resp TracerCommand + err = dec.Decode(&resp) + if err != nil { + return fmt.Errorf("invalid response: %q", err) + } + + if resp.Cmd != Ok { + return fmt.Errorf("unexpected response: %d", resp.Cmd) + } + + return nil +} + +func (x *Tracer) ConnectToAddress(addrs []string) ([]string, error) { + x.lock.Lock() + defer x.lock.Unlock() + + cmd := TracerCommand{ + Cmd: ConnectToAddress, + DestinationAddress: addrs, + } + + m, err := json.Marshal(cmd) + if err != nil { + return nil, err + } + + writeSize, err := x.writer.Write(m) + if err != nil { + return nil, err + } + if writeSize != len(m) { + return nil, fmt.Errorf("unexpected written size expected=%d actual=%d", len(m), writeSize) + } + + dec := json.NewDecoder(x.reader) + var resp TracerCommand + err = dec.Decode(&resp) + if err != nil { + return nil, fmt.Errorf("invalid response: %q", err) + } + + if resp.Cmd != Ok { + return nil, fmt.Errorf("unexpected response: %d", resp.Cmd) + } + + return resp.DestinationAddress, nil +} diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index a6501cd..bf17c77 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -10,21 +10,36 @@ import ( "time" "github.com/rootless-containers/bypass4netns/pkg/api" + "github.com/rootless-containers/bypass4netns/pkg/api/com" + "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) type Driver struct { BypassExecutablePath string + ComSocketPath string bypass map[string]api.BypassStatus lock sync.RWMutex + containerInterfaces map[string]com.ContainerInterfaces + interfacesLock sync.RWMutex + HandleC2CEnable bool + TracerEnable bool + MultinodeEnable bool + MultinodeEtcdAddress string + MultinodeHostAddress string } -func NewDriver(execPath string) *Driver { +func NewDriver(execPath string, comSocketPath string) *Driver { return &Driver{ BypassExecutablePath: execPath, + ComSocketPath: comSocketPath, bypass: map[string]api.BypassStatus{}, lock: sync.RWMutex{}, + containerInterfaces: map[string]com.ContainerInterfaces{}, + interfacesLock: sync.RWMutex{}, + TracerEnable: false, + MultinodeEnable: false, } } @@ -41,10 +56,14 @@ func (d *Driver) ListBypass() []api.BypassStatus { } func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { - logger := logrus.WithFields(logrus.Fields{"ID": shrinkID(spec.ID)}) + logger := logrus.WithFields(logrus.Fields{"ID": util.ShrinkID(spec.ID)}) logger.Info("Starting bypass") b4nnArgs := []string{} + if logger.Logger.GetLevel() == logrus.DebugLevel { + b4nnArgs = append(b4nnArgs, "--debug") + } + if spec.SocketPath != "" { socketOption := fmt.Sprintf("--socket=%s", spec.SocketPath) b4nnArgs = append(b4nnArgs, socketOption) @@ -68,6 +87,20 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { b4nnArgs = append(b4nnArgs, fmt.Sprintf("--ignore=%s", subnet)) } + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--com-socket=%s", d.ComSocketPath)) + if d.HandleC2CEnable { + b4nnArgs = append(b4nnArgs, "--handle-c2c-connections") + } + if d.TracerEnable { + b4nnArgs = append(b4nnArgs, "--tracer=true") + } + + if d.MultinodeEnable { + b4nnArgs = append(b4nnArgs, "--multinode=true") + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--multinode-etcd-address=%s", d.MultinodeEtcdAddress)) + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--multinode-host-address=%s", d.MultinodeHostAddress)) + } + // prepare pipe for ready notification readyR, readyW, err := os.Pipe() if err != nil { @@ -109,7 +142,7 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { } func (d *Driver) StopBypass(id string) error { - logger := logrus.WithFields(logrus.Fields{"ID": shrinkID(id)}) + logger := logrus.WithFields(logrus.Fields{"ID": util.ShrinkID(id)}) logger.Infof("Stopping bypass") d.lock.Lock() defer d.lock.Unlock() @@ -146,9 +179,51 @@ func (d *Driver) StopBypass(id string) error { delete(d.bypass, id) logger.Info("Stopped bypass") + // remove the container's interfaces + d.DeleteInterface(id) + return nil } +func (d *Driver) ListInterfaces() map[string]com.ContainerInterfaces { + d.interfacesLock.RLock() + defer d.interfacesLock.RUnlock() + + ifs := map[string]com.ContainerInterfaces{} + // copy map + for k := range d.containerInterfaces { + ifs[k] = d.containerInterfaces[k] + } + + return ifs +} + +func (d *Driver) GetInterface(id string) *com.ContainerInterfaces { + d.interfacesLock.RLock() + defer d.interfacesLock.RUnlock() + + ifs, ok := d.containerInterfaces[id] + if !ok { + return nil + } + + return &ifs +} + +func (d *Driver) PostInterface(id string, containerIfs *com.ContainerInterfaces) { + d.interfacesLock.Lock() + defer d.interfacesLock.Unlock() + + d.containerInterfaces[id] = *containerIfs +} + +func (d *Driver) DeleteInterface(id string) { + d.interfacesLock.Lock() + defer d.interfacesLock.Unlock() + + delete(d.containerInterfaces, id) +} + // waitForReady is from libpod // https://github.com/containers/libpod/blob/e6b843312b93ddaf99d0ef94a7e60ff66bc0eac8/libpod/networking_linux.go#L272-L308 func waitForReadyFD(cmdPid int, r *os.File) error { @@ -183,15 +258,3 @@ func waitForReadyFD(cmdPid int, r *os.File) error { } return nil } - -// shrinkID shrinks id to short(12 chars) id -// 6d9bcda7cebd551ddc9e3173d2139386e21b56b241f8459c950ef58e036f6bd8 -// to -// 6d9bcda7cebd -func shrinkID(id string) string { - if len(id) < 12 { - return id - } - - return id[0:12] -} diff --git a/pkg/oci/oci.go b/pkg/oci/oci.go index 495d5b6..9ea3b8c 100644 --- a/pkg/oci/oci.go +++ b/pkg/oci/oci.go @@ -11,7 +11,7 @@ const ( SocketName = "bypass4netns.sock" ) -var SyscallsToBeNotified = []string{"bind", "close", "connect", "sendmsg", "sendto", "setsockopt"} +var SyscallsToBeNotified = []string{"bind", "close", "connect", "setsockopt", "fcntl", "_exit", "exit_group", "getpeername"} func GetDefaultSeccompProfile(listenerPath string) *specs.LinuxSeccomp { tmpl := specs.LinuxSeccomp{ diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000..2aaf4af --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,87 @@ +package util + +import ( + "fmt" + "net" + "os" + "syscall" +) + +// shrinkID shrinks id to short(12 chars) id +// 6d9bcda7cebd551ddc9e3173d2139386e21b56b241f8459c950ef58e036f6bd8 +// to +// 6d9bcda7cebd +func ShrinkID(id string) string { + if len(id) < 12 { + return id + } + + return id[0:12] +} + +func SameUserNS(pidX, pidY int) (bool, error) { + nsX := fmt.Sprintf("/proc/%d/ns/user", pidX) + nsY := fmt.Sprintf("/proc/%d/ns/user", pidY) + nsXResolved, err := os.Readlink(nsX) + if err != nil { + return false, err + } + nsYResolved, err := os.Readlink(nsY) + if err != nil { + return false, err + } + return nsXResolved == nsYResolved, nil +} + +// copied from https://github.com/pfnet-research/meta-fuse-csi-plugin/blob/437dbbbbf16e5b02f9a508e3403d044b0a9dff89/pkg/util/fdchannel.go#L29 +// which is licensed under apache 2.0 +func SendMsg(via net.Conn, fd int, msg []byte) error { + conn, ok := via.(*net.UnixConn) + if !ok { + return fmt.Errorf("failed to cast via to *net.UnixConn") + } + connf, err := conn.File() + if err != nil { + return err + } + socket := int(connf.Fd()) + defer connf.Close() + + rights := syscall.UnixRights(fd) + + return syscall.Sendmsg(socket, msg, rights, nil, 0) +} + +func RecvMsg(via net.Conn) (int, []byte, error) { + conn, ok := via.(*net.UnixConn) + if !ok { + return 0, nil, fmt.Errorf("failed to cast via to *net.UnixConn") + } + connf, err := conn.File() + if err != nil { + return 0, nil, err + } + socket := int(connf.Fd()) + defer connf.Close() + + buf := make([]byte, syscall.CmsgSpace(4)) + b := make([]byte, 500) + //nolint:dogsled + n, _, _, _, err := syscall.Recvmsg(socket, b, buf, 0) + if err != nil { + return 0, nil, err + } + + var msgs []syscall.SocketControlMessage + msgs, err = syscall.ParseSocketControlMessage(buf) + if err != nil { + return 0, nil, err + } + + fds, err := syscall.ParseUnixRights(&msgs[0]) + if err != nil { + return 0, nil, err + } + + return fds[0], b[:n], err +} diff --git a/test/Dockerfile b/test/Dockerfile index 1922566..b9a4029 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/docker/library/alpine:3.15 +FROM public.ecr.aws/docker/library/alpine:3.16 RUN apk add python3 diff --git a/test/DockerfileHttpServer b/test/DockerfileHttpServer new file mode 100644 index 0000000..ffae7c1 --- /dev/null +++ b/test/DockerfileHttpServer @@ -0,0 +1,13 @@ +FROM golang:1.21.3 as bench-builder + +COPY httpserver.go . +# static link +RUN CGO_ENABLED=0 go build -o /httpserver httpserver.go + +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +COPY --from=bench-builder /httpserver /httpserver + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/test/enter.sh b/test/enter.sh new file mode 100755 index 0000000..eebe5ad --- /dev/null +++ b/test/enter.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -eux -o pipefail + +PID=$(nerdctl inspect $1 | jq '.[0].State.Pid') +nsenter -t $PID -F -U --preserve-credentials -n diff --git a/test/export_lxc_image.sh b/test/export_lxc_image.sh new file mode 100755 index 0000000..0467b83 --- /dev/null +++ b/test/export_lxc_image.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eux -o pipefail + +IMAGE_NAME=$1 + +sudo lxc snapshot $IMAGE_NAME snp0 +sudo lxc publish $IMAGE_NAME/snp0 --alias $IMAGE_NAME-export --compression zstd +sudo lxc image export $IMAGE_NAME-export /tmp/$IMAGE_NAME-image diff --git a/test/httpserver.go b/test/httpserver.go new file mode 100644 index 0000000..6957279 --- /dev/null +++ b/test/httpserver.go @@ -0,0 +1,66 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "log" + "net/http" +) + +var ( + url = flag.String("url", "http://localhost/blk-1m", "") + mode = flag.String("mode", "server", "") +) + +func main() { + flag.Parse() + + // disable connection pool + http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = -1 + + if *mode == "server" { + fmt.Println("starting server") + server() + } else if *mode == "client" { + err := client(*url) + if err != nil { + log.Fatal(err) + } + } +} + +func server() { + http.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "pong") + }) + + log.Fatal(http.ListenAndServe(":8080", nil)) +} + +func client(url string) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed Do err=%q", err) + } + + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status code %d", resp.StatusCode) + } else { + var buffer bytes.Buffer + + _, err = io.Copy(&buffer, resp.Body) + if err != nil { + return fmt.Errorf("failed Copy() err=%q", err) + } + + fmt.Printf("resp=%s\n", buffer.String()) + } + err = resp.Body.Close() + if err != nil { + return fmt.Errorf("failed Close() err=%q", err) + } + + return nil +} diff --git a/test/init_test.sh b/test/init_test.sh new file mode 100755 index 0000000..ed4e952 --- /dev/null +++ b/test/init_test.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +TEST_USER=ubuntu +if [ -v GITHUB_WORKSPACE ]; then + TEST_USER=runner +fi +set -eu -o pipefail + + +if [ "$(whoami)" != "$TEST_USER" ]; then + su $TEST_USER -c $0 + exit 0 +fi + +GO_VERSION="1.21.4" +NERDCTL_VERSION="1.7.0" + +echo "===== Prepare =====" +( + set -x + + # for lxc + if [ -d /host ]; then + sudo cp -r /host ~/bypass4netns + fi + + # for github actions runner + if [ $TEST_USER == "runner" ]; then + cd ../ + cp -r bypass4netns ~/bypass4netns + fi + + sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns + + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd jq tcpdump ethtool python3-pip + pip3 install matplotlib numpy + sudo systemctl stop etcd + sudo systemctl disable etcd + + systemctl --user start dbus + + curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | sudo tar Cxz /usr/local + echo "export PATH=$PATH:/usr/local/go/bin" >> ~/.profile + source ~/.profile + + curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxz /usr/local + containerd-rootless-setuptool.sh install + containerd-rootless-setuptool.sh install-buildkit + + # build nerdctl with bypass4netns + curl -fsSL https://github.com/containerd/nerdctl/archive/refs/tags/v${NERDCTL_VERSION}.tar.gz | tar Cxz ~/ + cd ~/nerdctl-${NERDCTL_VERSION} + echo "replace github.com/rootless-containers/bypass4netns => /home/$TEST_USER/bypass4netns" >> go.mod + make + sudo rm -f /usr/local/bin/nerdctl + sudo cp _output/nerdctl /usr/local/bin/nerdctl + nerdctl info + + cd ~/bypass4netns + make + sudo rm -f /usr/local/bin/bypass4netns* + sudo make install + + # also enable rootful containerd for rootful container testing + sudo systemctl enable --now containerd + sudo systemctl enable --now buildkit +) diff --git a/test/launch_test_lxc.sh b/test/launch_test_lxc.sh new file mode 100755 index 0000000..6710634 --- /dev/null +++ b/test/launch_test_lxc.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -ux -o pipefail +IMAGE=${1:-"images:ubuntu/22.04"} + +cd $(dirname $0) + +# lxd init --auto --storage-backend=btrfs +sudo lxc launch -c security.privileged=true -c security.nesting=true $IMAGE test +sudo lxc config device add test share disk source=$(cd ../; pwd) path=/host +sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" +# let user services running +# this sometimes fails, retry until success +RES=1 +while [ $RES -ne 0 ] +do + sleep 1 + sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sudo loginctl enable-linger" + RES=$? +done diff --git a/test/lxd.yaml b/test/lxd.yaml new file mode 100644 index 0000000..26fc8b0 --- /dev/null +++ b/test/lxd.yaml @@ -0,0 +1,38 @@ +config: {} +networks: +- config: + ipv4.address: 192.168.6.1/24 + ipv4.nat: "true" + ipv6.address: none + description: "" + name: lxdbr0 + type: bridge + project: default +storage_pools: +- config: + size: 30GiB + source: /var/snap/lxd/common/lxd/disks/default.img + description: "" + name: default + driver: btrfs +profiles: +- config: {} + description: Default LXD profile + devices: + eth0: + name: eth0 + network: lxdbr0 + type: nic + root: + path: / + pool: default + type: disk + name: default +projects: +- config: + features.images: "true" + features.networks: "true" + features.profiles: "true" + features.storage.volumes: "true" + description: Default LXD project + name: default \ No newline at end of file diff --git a/test/multinode.sh b/test/multinode.sh new file mode 100755 index 0000000..dd95eb0 --- /dev/null +++ b/test/multinode.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +cd $(dirname $0) +. ./util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f vxlan +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +set -eux -o pipefail + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name vxlan -d $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh vxlan $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test" exec_lxc systemd-run --user --unit run-test-iperf3 nerdctl exec vxlan iperf3 -s + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name vxlan -d $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh vxlan $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test2" exec_lxc nerdctl exec vxlan iperf3 -c $TEST1_VXLAN_ADDR + + NAME="test" exec_lxc nerdctl rm -f vxlan + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f vxlan +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name vxlan $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test" exec_lxc systemd-run --user --unit run-test-iperf3 nerdctl exec vxlan iperf3 -s + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec vxlan hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name vxlan $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test2" exec_lxc nerdctl exec vxlan iperf3 -c $SERVER_IP + + NAME="test" exec_lxc nerdctl rm -f vxlan + NAME="test2" exec_lxc nerdctl rm -f vxlan +) diff --git a/test/run_test.sh b/test/run_test.sh new file mode 100755 index 0000000..304af4d --- /dev/null +++ b/test/run_test.sh @@ -0,0 +1,213 @@ +#!/bin/bash + +set -eu -o pipefail + +source ~/.profile + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" +nerdctl pull --quiet "${ALPINE_IMAGE}" + +SCRIPT_DIR=$(cd $(dirname $0); pwd) +set +u +if [ "$1" == "SYNC" ]; then + echo "updating source code" + rm -rf ~/bypass4netns + sudo cp -r /host ~/bypass4netns + sudo chown -R ubuntu:ubuntu ~/bypass4netns + cd ~/bypass4netns + echo "source code is updated" + exec $0 "FORK" + exit 0 +fi +cd ~/bypass4netns +rm -f bypass4netns bypass4netnsd +make +sudo make install +set -u +cd $SCRIPT_DIR + +set +e +systemctl --user stop run-iperf3 +systemctl --user reset-failed +sleep 1 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +set -e + +systemd-run --user --unit run-iperf3 iperf3 -s +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json + +sudo journalctl --rotate +sudo journalctl --vacuum-time=1s + +echo "===== rootful mode ====" +( + set +e + sudo nerdctl rm -f test + set -ex + + sudo nerdctl run -d --name test $ALPINE_IMAGE sleep infinity + sudo nerdctl exec test apk add --no-cache iperf3 + sudo nerdctl exec test iperf3 -c $HOST_IP -t 1 --connect-timeout 1000 # it must success to connect. + + sudo nerdctl rm -f test +) + +echo "===== static linked binary test =====" +( + set +e + systemctl --user stop run-bypass4netns-static + nerdctl rm -f test1 + nerdctl rm -f test2 + systemctl --user reset-failed + set -ex + + IMAGE_NAME="b4ns:static" + nerdctl build -f ./DockerfileHttpServer -t $IMAGE_NAME . + + systemd-run --user --unit run-bypass4netns-static bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" + sleep 1 + nerdctl run -d -p 8081:8080 --name test1 $IMAGE_NAME /httpserver -mode server + nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test2 $IMAGE_NAME sleep infinity + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + + COUNT=$(journalctl --user -u run-bypass4netns-static.service | grep 'bypassed connect socket' | wc -l) + if [ $COUNT != 3 ]; then + echo "static linked binary bypassing not working correctly." + exit 1 + fi + + nerdctl rm -f test1 + nerdctl rm -f test2 + systemctl --user stop run-bypass4netns-static +) + +echo "===== '--ignore' option test =====" +( + set +e + systemctl --user stop run-bypass4netns + nerdctl rm -f test + set -ex + + systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug + nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $HOST_IP -t 1 + # TODO: this check is dirty. we want better method to check the connect(2) is ignored. + journalctl --user -u run-bypass4netns.service | grep "is not bypassed" + nerdctl rm -f test + systemctl --user stop run-bypass4netns.service +) + +echo "===== connect(2) test =====" +( + systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 + set -x + cd $SCRIPT_DIR + /bin/bash test_syscalls.sh /tmp/seccomp.json $HOST_IP + systemctl --user stop run-bypass4netns.service +) + +echo "===== Test bypass4netnsd =====" +( + set -x + source ~/.profile + ./test_b4nnd.sh +) + +echo "===== tracer test (disabled) =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --handle-c2c-connections=true + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd +) + +echo "===== tracer test (enabled) =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --handle-c2c-connections=true --tracer=true --debug + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + set +e + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must not success to connect. + if [ $? -eq 0 ]; then + echo "tracer seems not working" + exit 1 + fi + set -e + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd +) + + +echo "===== multinode test (single node) ====" +( + set +e + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP --debug + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/test/seccomp.json.sh b/test/seccomp.json.sh index efa7f85..df6596d 100755 --- a/test/seccomp.json.sh +++ b/test/seccomp.json.sh @@ -22,9 +22,11 @@ cat < /dev/null & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 - -# test_connect udp -python3 test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 - -# check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test connect over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi -echo "test_connect done." - -echo "test_sendto starting..." -# test_sendto tcp -python3 test_sendto.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 - -# test_sendto udp -python3 test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 - -# check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test sendto over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi -echo "test_sendto done." - -echo "test_sendmsg starting..." -# test_sendmsg tcp -python3 test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 - -# test_sendmsg udp -python3 test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 - -# check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test sendto over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi -echo "test_sendmsg done." - -nerdctl rm -f $TEST_CONTAINER_2 -nerdctl rm -f $TEST_CONTAINER_1 -rm /tmp/test_host /tmp/test_test2 diff --git a/test/test_syscalls.sh b/test/test_syscalls.sh new file mode 100755 index 0000000..bc24608 --- /dev/null +++ b/test/test_syscalls.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +set -eu -o pipefail + +SECCOMP_CONFIG_PATH=$1 +HOST_IP=$2 +TEST_CONTAINER_1=test_1 +TEST_CONTAINER_2=test_2 + +ALPINE_IMAGE="alpine_test:connect" +nerdctl image build --file Dockerfile -t $ALPINE_IMAGE --no-cache . +nerdctl run --security-opt seccomp=$SECCOMP_CONFIG_PATH -d --name $TEST_CONTAINER_1 "${ALPINE_IMAGE}" sleep infinity +nerdctl run --security-opt seccomp=$SECCOMP_CONFIG_PATH -d --name $TEST_CONTAINER_2 "${ALPINE_IMAGE}" sleep infinity + +NETNS_IP=`nerdctl exec $TEST_CONTAINER_2 hostname -i` +echo $NETNS_IP + +echo "test_connect starting..." +# test_connect tcp +python3 test_connect.py -s -p 8888 --count 2 &> /dev/null & +nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 --count 2 &> /dev/null & +nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 + +## NOTICE ## +# currently, bypass4netns supports only TCP. Tests for udp connections are disabled. +# test_connect udp +#python3 test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 + +# check server is not timedout +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test connect over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi +echo "test_connect done." + +#echo "test_sendto starting..." +## test_sendto tcp +#python3 test_sendto.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 +# +## test_sendto udp +#python3 test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 +# +## check server is not timedout +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test sendto over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi +#echo "test_sendto done." +# +#echo "test_sendmsg starting..." +## test_sendmsg tcp +#python3 test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 +# +## test_sendmsg udp +#python3 test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 +# +## check server is not timedout +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test sendto over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi +#echo "test_sendmsg done." + +nerdctl rm -f $TEST_CONTAINER_2 +nerdctl rm -f $TEST_CONTAINER_1 +# rm /tmp/test_host /tmp/test_test2 diff --git a/test/util.sh b/test/util.sh new file mode 100644 index 0000000..051c0cb --- /dev/null +++ b/test/util.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -eu -o pipefail + +function exec_netns() { + if [ $EUID -eq 0 ]; then + nsenter -t $PID -F -n -- "$@" + else + nsenter -t $PID -F -U --preserve-credentials -n -- "$@" + fi +} + +function exec_lxc() { + sudo lxc exec $NAME -- sudo --login --user ubuntu "$@" +} \ No newline at end of file