From 5f2c5fcb1046281ccd97873b5c3f8c5871884b15 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 26 Oct 2023 14:47:15 +0900 Subject: [PATCH 01/55] enable debug mode in bypass4netns when debug mode enabled Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netnsd/bypass4netnsd.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index a6501cd..1efc779 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -45,6 +45,10 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { logger.Info("Starting bypass") b4nnArgs := []string{} + if logger.Logger.GetLevel() == logrus.DebugLevel { + b4nnArgs = append(b4nnArgs, "--debug") + } + if spec.SocketPath != "" { socketOption := fmt.Sprintf("--socket=%s", spec.SocketPath) b4nnArgs = append(b4nnArgs, socketOption) From 45de5071a27998601a6bf106fd80d3b810a5e23a Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 26 Oct 2023 15:58:38 +0900 Subject: [PATCH 02/55] handle fcntl(2) and apply commands apt(8) configures sockets to be non-blocking via fcntl. This patch records fcntl's F_SETFD, F_SETFL and applies them on created sockets. Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 22 ++++++++++-- pkg/bypass4netns/socket.go | 62 +++++++++++++++++++++++++++----- pkg/oci/oci.go | 2 +- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index f6de96e..3ec7b60 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -593,6 +593,21 @@ func (h *notifHandler) handleSysSetsockopt(ctx *context) { } } +func (h *notifHandler) handleSysFcntl(ctx *context) { + logger := logrus.WithFields(logrus.Fields{"syscall": "fcntl", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) + logger.Debugf("handle") + fcntlCmd := ctx.req.Data.Args[1] + switch fcntlCmd { + case unix.F_SETFD: // 0x2 + case unix.F_SETFL: // 0x4 + h.socketInfo.recordFcntl(ctx, logger) + case unix.F_GETFL: // 0x3 + // ignore these + default: + logger.Warnf("Unknown fcntl command 0x%x ignored.", fcntlCmd) + } +} + // handleReq handles seccomp notif requests and configures responses. func (h *notifHandler) handleReq(ctx *context) { syscallName, err := ctx.req.Data.Syscall.GetName() @@ -619,6 +634,8 @@ func (h *notifHandler) handleReq(ctx *context) { h.handleSysSendto(ctx) case "setsockopt": h.handleSysSetsockopt(ctx) + case "fcntl": + h.handleSysFcntl(ctx) default: logrus.Errorf("Unknown syscall %q", syscallName) // TODO: error handle @@ -744,8 +761,9 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState state: state, forwardingPorts: map[int]ForwardPortMapping{}, socketInfo: socketInfo{ - options: map[string][]socketOption{}, - status: map[string]socketStatus{}, + options: map[string][]socketOption{}, + fcntlOptions: map[string][]fcntlOption{}, + status: map[string]socketStatus{}, }, } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 0849cd8..5cee9d4 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -15,6 +15,12 @@ type socketOption struct { optlen uint64 } +// Handle F_SETFL, F_SETFD options +type fcntlOption struct { + cmd uint64 + value uint64 +} + type socketState int const ( @@ -34,23 +40,34 @@ type socketStatus struct { } type socketInfo struct { - options map[string][]socketOption - status map[string]socketStatus + options map[string][]socketOption + fcntlOptions map[string][]fcntlOption + status map[string]socketStatus } // configureSocket set recorded socket options. func (info *socketInfo) configureSocket(ctx *context, sockfd int) error { key := fmt.Sprintf("%d:%d", ctx.req.Pid, ctx.req.Data.Args[0]) optValues, ok := info.options[key] - if !ok { - return nil + if ok { + for _, optVal := range optValues { + _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) + if errno != 0 { + return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) + } + logrus.Debugf("configured socket option pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, optVal) + } } - for _, optVal := range optValues { - _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) - if errno != 0 { - return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) + + fcntlValues, ok := info.fcntlOptions[key] + if ok { + for _, fcntlVal := range fcntlValues { + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(sockfd), uintptr(fcntlVal.cmd), uintptr(fcntlVal.value)) + if errno != 0 { + return fmt.Errorf("fnctl failed(%v): %s", fcntlVal, errno) + } + logrus.Debugf("configured socket fcntl pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, fcntlVal) } - logrus.Debugf("configured socket option pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, optVal) } return nil @@ -85,6 +102,28 @@ func (info *socketInfo) recordSocketOption(ctx *context, logger *logrus.Entry) e return nil } +// recordSocketOption records socket option. +func (info *socketInfo) recordFcntl(ctx *context, logger *logrus.Entry) error { + sockfd := ctx.req.Data.Args[0] + cmd := ctx.req.Data.Args[1] + value := ctx.req.Data.Args[2] + + key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) + _, ok := info.fcntlOptions[key] + if !ok { + info.fcntlOptions[key] = make([]fcntlOption, 0) + } + + option := fcntlOption{ + cmd: cmd, + value: value, + } + info.fcntlOptions[key] = append(info.fcntlOptions[key], option) + + logger.Debugf("recorded fcntl sockfd=%d cmd=%d value=%d", sockfd, cmd, value) + return nil +} + // deleteSocketOptions delete recorded socket options and status func (info *socketInfo) deleteSocket(ctx *context, logger *logrus.Entry) { sockfd := ctx.req.Data.Args[0] @@ -94,6 +133,11 @@ func (info *socketInfo) deleteSocket(ctx *context, logger *logrus.Entry) { delete(info.options, key) logger.Debugf("removed socket options") } + _, ok = info.fcntlOptions[key] + if ok { + delete(info.fcntlOptions, key) + logger.Debugf("removed fcntl options") + } status, ok := info.status[key] if ok { diff --git a/pkg/oci/oci.go b/pkg/oci/oci.go index 495d5b6..ce6069d 100644 --- a/pkg/oci/oci.go +++ b/pkg/oci/oci.go @@ -11,7 +11,7 @@ const ( SocketName = "bypass4netns.sock" ) -var SyscallsToBeNotified = []string{"bind", "close", "connect", "sendmsg", "sendto", "setsockopt"} +var SyscallsToBeNotified = []string{"bind", "close", "connect", "sendmsg", "sendto", "setsockopt", "fcntl"} func GetDefaultSeccompProfile(listenerPath string) *specs.LinuxSeccomp { tmpl := specs.LinuxSeccomp{ From 95b81903d7e2ced3e8022e7a664d7606400b8ef6 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sun, 5 Nov 2023 14:13:35 +0000 Subject: [PATCH 03/55] refactored bypass4netns Re-constructed entire data format. Currently, only SOCK_STREAM socket is handled. Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 492 +++++-------------------------- pkg/bypass4netns/sockaddr.go | 12 +- pkg/bypass4netns/socket.go | 296 ++++++++++++++----- pkg/oci/oci.go | 2 +- 4 files changed, 305 insertions(+), 497 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 3ec7b60..dc6a94f 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -4,9 +4,7 @@ package bypass4netns // The code is licensed under Apache-2.0 License import ( - "bytes" gocontext "context" - "encoding/binary" "encoding/json" "errors" "fmt" @@ -179,38 +177,6 @@ func getSocketArgs(sockfd int) (int, int, int, error) { return sock_domain, sock_type, sock_protocol, nil } -// duplicateSocketOnHost duplicate socket in other process to socket on host. -// retun values are (duplicated socket fd, target socket fd in current process, error) -func duplicateSocketOnHost(pid int, _sockfd int) (int, int, error) { - sockfd, err := getFdInProcess(pid, _sockfd) - if err != nil { - return 0, 0, fmt.Errorf("failed to get fd %s", err) - } - - sock_domain, sock_type, sock_protocol, err := getSocketArgs(sockfd) - if err != nil { - return 0, 0, fmt.Errorf("failed to get socket args %s", err) - } - - switch sock_domain { - case syscall.AF_INET, syscall.AF_INET6: - default: - return 0, 0, fmt.Errorf("expected AF_INET or AF_INET6, got %d", sock_domain) - } - - // only SOCK_STREAM and SOCK_DGRAM are acceptable. - if sock_type != syscall.SOCK_STREAM && sock_type != syscall.SOCK_DGRAM { - return 0, 0, fmt.Errorf("SOCK_STREAM and SOCK_DGRAM are supported") - } - - sockfd2, err := syscall.Socket(sock_domain, sock_type, sock_protocol) - if err != nil { - return 0, 0, fmt.Errorf("socket failed: %s", err) - } - - return sockfd2, sockfd, nil -} - func readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockaddr, error) { buf, err := readProcMem(pid, offset, addrlen) if err != nil { @@ -219,423 +185,125 @@ func readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockad return newSockaddr(buf) } -// manageSocket manages socketStatus and return next injecting file descriptor -// return values are (continue?, injecting fd) -func (h *notifHandler) manageSocket(destAddr net.IP, pid int, sockfd int, logger *logrus.Entry) (bool, int) { - destIsIgnored := h.nonBypassable.Contains(destAddr) - key := fmt.Sprintf("%d:%d", pid, sockfd) - sockStatus, ok := h.socketInfo.status[key] +func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, error) { + logger := logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}) + proc, ok := h.processes[pid] if !ok { - if destIsIgnored { - // the socket has never been bypassed and no need to bypass - logger.Debugf("%s is ignored, skipping.", destAddr.String()) - return false, 0 - } else { - // the socket has never been bypassed and need to bypass - sockfd2, sockfd, err := duplicateSocketOnHost(pid, sockfd) - if err != nil { - logger.Errorf("duplicating socket failed: %s", err) - return false, 0 - } - - sockStatus := socketStatus{ - state: Bypassed, - fdInNetns: sockfd, - fdInHost: sockfd2, - } - h.socketInfo.status[key] = sockStatus - logger.Debugf("start to bypass fdInHost=%d fdInNetns=%d", sockStatus.fdInHost, sockStatus.fdInNetns) - return true, sockfd2 - } - } else { - if sockStatus.state == Bypassed { - if !destIsIgnored { - // the socket has been bypassed and continue to be bypassed - logger.Debugf("continue to bypass") - return false, 0 - } else { - // the socket has been bypassed and need to switch back to socket in netns - logger.Debugf("switchback fdInHost(%d) -> fdInNetns(%d)", sockStatus.fdInHost, sockStatus.fdInNetns) - sockStatus.state = SwitchBacked - - h.socketInfo.status[key] = sockStatus - return true, sockStatus.fdInNetns - } - } else if sockStatus.state == SwitchBacked { - if destIsIgnored { - // the socket has been switchbacked(not bypassed) and no need to be bypassed - logger.Debugf("continue not bypassing") - return false, 0 - } else { - // the socket has been switchbacked(not bypassed) and need to bypass again - logger.Debugf("bypass again fdInNetns(%d) -> fdInHost(%d)", sockStatus.fdInNetns, sockStatus.fdInHost) - sockStatus.state = Bypassed - - h.socketInfo.status[key] = sockStatus - return true, sockStatus.fdInHost - } - } else { - panic(fmt.Errorf("unexpected state :%d", sockStatus.state)) - } + proc = newProcessStatus() + h.processes[pid] = proc + logger.Info("process is registered") } -} -// handleSysBind handles syscall bind(2). -// If binding port is the target of port-forwarding, -// it creates and configures including bind(2) a socket on host. -// Then, handler replaces container's socket to created one. -func (h *notifHandler) handleSysBind(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "bind", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return + sock, ok := proc.sockets[sockfd] + if ok { + logger.Info("socket is already registered") + return sock, nil } - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // TODO: get port-fowrad mapping from nerdctl - fwdPort, ok := h.forwardingPorts[int(sa.Port)] - if !ok { - logger.Infof("port=%d is not target of port forwarding.", sa.Port) - return - } - - sockfd2, sockfd, err := duplicateSocketOnHost(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) + sockFdHost, err := getFdInProcess(int(pid), sockfd) if err != nil { - logger.Errorf("duplicating socket failed: %s", err) - return + return nil, err } - defer syscall.Close(sockfd) - defer syscall.Close(sockfd2) + defer syscall.Close(sockFdHost) - err = h.socketInfo.configureSocket(ctx, sockfd2) + sockDomain, sockType, sockProtocol, err := getSocketArgs(sockFdHost) + sock = newSocketStatus(pid, sockfd, sockDomain, sockType, sockProtocol) if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) - return - } - - var bind_addr syscall.Sockaddr - - switch sa.Family { - case syscall.AF_INET: - var addr [4]byte - for i := 0; i < 4; i++ { - addr[i] = sa.IP[i] - } - bind_addr = &syscall.SockaddrInet4{ - Port: fwdPort.HostPort, - Addr: addr, - } - case syscall.AF_INET6: - var addr [16]byte - for i := 0; i < 16; i++ { - addr[i] = sa.IP[i] - } - bind_addr = &syscall.SockaddrInet6{ - Port: fwdPort.HostPort, - ZoneId: sa.ScopeID, - Addr: addr, + // non-socket fd is not bypassable + sock.state = NotBypassable + } else { + if sockDomain != syscall.AF_INET && sockDomain != syscall.AF_INET6 { + // non IP sockets are not handled. + sock.state = NotBypassable + } else if sockType != syscall.SOCK_STREAM { + // only accepting TCP socket + sock.state = NotBypassable + } else { + // only newly created socket is allowed. + _, err := syscall.Getpeername(sockFdHost) + if err == nil { + logger.Infof("socket is already connected. socket is created via accept or forked") + sock.state = NotBypassable + } } } - err = syscall.Bind(sockfd2, bind_addr) - if err != nil { - logger.Errorf("bind failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } + proc.sockets[sockfd] = sock + logger.Infof("socket is registered (state=%s)", sock.state) - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) - return - } - - logger.Infof("binding for %d:%d is done", fwdPort.HostPort, fwdPort.ChildPort) - - ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) -} - -// handleSysClose handles `close(2)` and delete recorded socket options. -func (h *notifHandler) handleSysClose(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "close", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - logger.Trace("handle") - h.socketInfo.deleteSocket(ctx, logger) + return sock, nil } -// handleSysConnect handles syscall connect(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -func (h *notifHandler) handleSysConnect(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "connect", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return - } - - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - - if !cont { - return - } - defer syscall.Close(sockfd2) - - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) - return +func (h *notifHandler) getSocket(pid uint32, sockfd int) *socketStatus { + proc, ok := h.processes[pid] + if !ok { + return nil } + sock := proc.sockets[sockfd] + return sock } -type msgHdrName struct { - Name uint64 - Namelen uint32 -} - -// handleSysSendto handles syscall sendmsg(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -// This handles only SOCK_DGRAM sockets. -func (h *notifHandler) handleSysSendmsg(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "sendmsg", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - msghdr := msgHdrName{} - buf, err := readProcMem(ctx.req.Pid, ctx.req.Data.Args[1], 12) - if err != nil { - logger.Errorf("failed readProcMem pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) - } - - reader := bytes.NewReader(buf) - err = binary.Read(reader, binary.LittleEndian, &msghdr) - if err != nil { - logger.Errorf("cannnot cast byte array to Msghdr: %s", err) - } - - // addrlen == 0 means the socket is already connected - if msghdr.Namelen == 0 { - return - } - - sockfd, err := getFdInProcess(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) - if err != nil { - logger.Errorf("failed to get fd: %s", err) - } - sock_domain, sock_type, _, err := getSocketArgs(sockfd) - - if err != nil { - logger.Errorf("failed to get socket args: %v", err) - return - } - - switch sock_domain { - case syscall.AF_INET, syscall.AF_INET6: - default: - logger.Debugf("only supported AF_INET, AF_INET6: %d", sock_domain) - return - } - - if sock_type != syscall.SOCK_DGRAM { - logger.Debug("only SOCK_DGRAM sockets are handled") - return - } - - addrOffset := uint64(msghdr.Name) - sa, err := readSockaddrFromProcess(ctx.req.Pid, addrOffset, uint64(msghdr.Namelen)) - if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) - return - } - - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - defer syscall.Close(sockfd2) - - if !cont { - return - } - - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("setsocketoptions failed: %s", err) - return - } - - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, - } - - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) +func (h *notifHandler) removeSocket(pid uint32, sockfd int) { + defer logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}).Infof("socket is removed") + proc, ok := h.processes[pid] + if !ok { return } + delete(proc.sockets, sockfd) } -// handleSysSendto handles syscall sendto(2). -// If destination is outside of container network, -// it creates and configures a socket on host. -// Then, handler replaces container's socket to created one. -// This handles only SOCK_DGRAM sockets. -func (h *notifHandler) handleSysSendto(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "sendto", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - // addrlen == 0 is send(2) - if ctx.req.Data.Args[5] == 0 { - return - } - - sockfd, err := getFdInProcess(int(ctx.req.Pid), int(ctx.req.Data.Args[0])) - if err != nil { - logger.Errorf("failed to get fd: %s", err) - } - defer syscall.Close(sockfd) - sock_domain, sock_type, _, err := getSocketArgs(sockfd) - - if err != nil { - logger.Errorf("failed to get socket args: %v", err) - return - } - - if sock_domain != syscall.AF_INET { - logger.Debugf("only supported AF_INET: %d", sock_domain) - return - } - - if sock_type != syscall.SOCK_DGRAM { - logger.Debug("only SOCK_DGRAM sockets are handled") - return - } - - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[4], ctx.req.Data.Args[5]) +// handleReq handles seccomp notif requests and configures responses. +func (h *notifHandler) handleReq(ctx *context) { + syscallName, err := ctx.req.Data.Syscall.GetName() if err != nil { - logger.Errorf("failed to read sockaddr from process: %v", err) + logrus.Errorf("Error decoding syscall %v(): %s", ctx.req.Data.Syscall, err) + // TODO: error handle return } + logrus.Tracef("Received syscall %q, pid %v, arch %q, args %+v", syscallName, ctx.req.Pid, ctx.req.Data.Arch, ctx.req.Data.Args) - logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) - - // Retrieve next injecting file descriptor - cont, sockfd2 := h.manageSocket(sa.IP, int(ctx.req.Pid), int(ctx.req.Data.Args[0]), logger) - defer syscall.Close(sockfd2) + ctx.resp.Flags |= SeccompUserNotifFlagContinue - if !cont { + // cleanup sockets when the process exit. + if syscallName == "_exit" || syscallName == "exit_group" { + delete(h.processes, ctx.req.Pid) + logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid}).Infof("process is removed") return } - // configure socket if switched - err = h.socketInfo.configureSocket(ctx, sockfd2) - if err != nil { - syscall.Close(sockfd2) - logger.Errorf("configure socketoptions failed: %s", err) + // remove socket when closed + if syscallName == "close" { + h.removeSocket(ctx.req.Pid, int(ctx.req.Data.Args[0])) return } - addfd := seccompNotifAddFd{ - id: ctx.req.ID, - flags: SeccompAddFdFlagSetFd, - srcfd: uint32(sockfd2), - newfd: uint32(ctx.req.Data.Args[0]), - newfdFlags: 0, + pid := ctx.req.Pid + sockfd := int(ctx.req.Data.Args[0]) + sock := h.getSocket(pid, sockfd) + if sock == nil { + sock, err = h.registerSocket(pid, sockfd) + if err != nil { + logrus.Errorf("failed to register socket pid %d sockfd %d: %s", pid, sockfd, err) + return + } } - err = addfd.ioctlNotifAddFd(ctx.notifFd) - if err != nil { - logger.Errorf("ioctl NotifAddFd failed: %s", err) + switch sock.state { + case NotBypassable, Bypassed: return - } -} - -// handleSyssetsockopt handles `setsockopt(2)` and records options. -// Recorded options are used in `handleSysConnect` or `handleSysBind` via `setSocketoptions` to configure created sockets. -func (h *notifHandler) handleSysSetsockopt(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "setsockopt", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - logger.Debugf("handle") - err := h.socketInfo.recordSocketOption(ctx, logger) - if err != nil { - logger.Errorf("recordSocketOption failed: %s", err) - } -} - -func (h *notifHandler) handleSysFcntl(ctx *context) { - logger := logrus.WithFields(logrus.Fields{"syscall": "fcntl", "pid": ctx.req.Pid, "sockfd": ctx.req.Data.Args[0]}) - logger.Debugf("handle") - fcntlCmd := ctx.req.Data.Args[1] - switch fcntlCmd { - case unix.F_SETFD: // 0x2 - case unix.F_SETFL: // 0x4 - h.socketInfo.recordFcntl(ctx, logger) - case unix.F_GETFL: // 0x3 - // ignore these default: - logger.Warnf("Unknown fcntl command 0x%x ignored.", fcntlCmd) - } -} - -// handleReq handles seccomp notif requests and configures responses. -func (h *notifHandler) handleReq(ctx *context) { - syscallName, err := ctx.req.Data.Syscall.GetName() - if err != nil { - logrus.Errorf("Error decoding syscall %v(): %s", ctx.req.Data.Syscall, err) - // TODO: error handle - return + // continue } - logrus.Tracef("Received syscall %q, pid %v, arch %q, args %+v", syscallName, ctx.req.Pid, ctx.req.Data.Arch, ctx.req.Data.Args) - - ctx.resp.Flags |= SeccompUserNotifFlagContinue switch syscallName { case "bind": - h.handleSysBind(ctx) - case "close": - // handling close(2) may cause performance degradation - h.handleSysClose(ctx) + sock.handleSysBind(h, ctx) case "connect": - h.handleSysConnect(ctx) - case "sendmsg": - h.handleSysSendmsg(ctx) - case "sendto": - h.handleSysSendto(ctx) + sock.handleSysConnect(h, ctx) case "setsockopt": - h.handleSysSetsockopt(ctx) + sock.handleSysSetsockopt(ctx) case "fcntl": - h.handleSysFcntl(ctx) + sock.handleSysFcntl(ctx) default: logrus.Errorf("Unknown syscall %q", syscallName) // TODO: error handle @@ -752,7 +420,9 @@ type notifHandler struct { nonBypassable *nonbypassable.NonBypassable nonBypassableAutoUpdate bool forwardingPorts map[int]ForwardPortMapping - socketInfo socketInfo + + // key is pid + processes map[uint32]*processStatus } func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { @@ -760,11 +430,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState fd: libseccomp.ScmpFd(fd), state: state, forwardingPorts: map[int]ForwardPortMapping{}, - socketInfo: socketInfo{ - options: map[string][]socketOption{}, - fcntlOptions: map[string][]fcntlOption{}, - status: map[string]socketStatus{}, - }, + processes: map[uint32]*processStatus{}, } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate diff --git a/pkg/bypass4netns/sockaddr.go b/pkg/bypass4netns/sockaddr.go index 2e7e70a..2250d99 100644 --- a/pkg/bypass4netns/sockaddr.go +++ b/pkg/bypass4netns/sockaddr.go @@ -16,6 +16,10 @@ type sockaddr struct { ScopeID uint32 // sin6_scope_id } +func (sa *sockaddr) String() string { + return fmt.Sprintf("%s:%d", sa.IP, sa.Port) +} + func newSockaddr(buf []byte) (*sockaddr, error) { sa := &sockaddr{} reader := bytes.NewReader(buf) @@ -34,9 +38,7 @@ func newSockaddr(buf []byte) (*sockaddr, error) { return nil, fmt.Errorf("cannot cast byte array to RawSockaddrInet4: %w", err) } sa.IP = make(net.IP, len(addr4.Addr)) - for i, x := range addr4.Addr { // nolint: gosimple - sa.IP[i] = x - } + copy(sa.IP, addr4.Addr[:]) p := make([]byte, 2) binary.BigEndian.PutUint16(p, addr4.Port) sa.Port = int(endian.Uint16(p)) @@ -49,9 +51,7 @@ func newSockaddr(buf []byte) (*sockaddr, error) { return nil, fmt.Errorf("cannot cast byte array to RawSockaddrInet6: %w", err) } sa.IP = make(net.IP, len(addr6.Addr)) - for i, x := range addr6.Addr { // nolint: gosimple - sa.IP[i] = x - } + copy(sa.IP, addr6.Addr[:]) p := make([]byte, 2) binary.BigEndian.PutUint16(p, addr6.Port) sa.Port = int(endian.Uint16(p)) diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 5cee9d4..45c4513 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -6,6 +6,7 @@ import ( "unsafe" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) type socketOption struct { @@ -24,58 +25,77 @@ type fcntlOption struct { type socketState int const ( + // NotBypassableSocket means that the fd is not socket or not bypassed + NotBypassable socketState = iota + + // NotBypassed means that the socket is not bypassed. + NotBypassed + // Bypassed means that the socket is replaced by one created on the host - Bypassed socketState = iota + Bypassed // SwitchBacked means that the socket was bypassed but now rereplaced to the socket in netns. // This state can be hannpend in connect(2), sendto(2) and sendmsg(2) // when connecting to a host outside of netns and then connecting to a host inside of netns with same fd. - SwitchBacked + //SwitchBacked ) -type socketStatus struct { - state socketState - fdInNetns int - fdInHost int +func (ss socketState) String() string { + switch ss { + case NotBypassable: + return "NotBypassable" + case NotBypassed: + return "NotBypassed" + case Bypassed: + return "Bypassed" + //case SwitchBacked: + // return "SwitchBacked" + default: + panic(fmt.Sprintf("unexpected enum %d: String() is not implmented", ss)) + } } -type socketInfo struct { - options map[string][]socketOption - fcntlOptions map[string][]fcntlOption - status map[string]socketStatus +type processStatus struct { + sockets map[int]*socketStatus } -// configureSocket set recorded socket options. -func (info *socketInfo) configureSocket(ctx *context, sockfd int) error { - key := fmt.Sprintf("%d:%d", ctx.req.Pid, ctx.req.Data.Args[0]) - optValues, ok := info.options[key] - if ok { - for _, optVal := range optValues { - _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) - if errno != 0 { - return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) - } - logrus.Debugf("configured socket option pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, optVal) - } +func newProcessStatus() *processStatus { + return &processStatus{ + sockets: map[int]*socketStatus{}, } +} - fcntlValues, ok := info.fcntlOptions[key] - if ok { - for _, fcntlVal := range fcntlValues { - _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(sockfd), uintptr(fcntlVal.cmd), uintptr(fcntlVal.value)) - if errno != 0 { - return fmt.Errorf("fnctl failed(%v): %s", fcntlVal, errno) - } - logrus.Debugf("configured socket fcntl pid=%d sockfd=%d (%v)", ctx.req.Pid, sockfd, fcntlVal) - } - } +type socketStatus struct { + state socketState + pid uint32 + sockfd int + sockDomain int + sockType int + sockProto int + // address for bind or connect + addr *sockaddr + socketOptions []socketOption + fcntlOptions []fcntlOption - return nil + logger *logrus.Entry +} + +func newSocketStatus(pid uint32, sockfd int, sockDomain, sockType, sockProto int) *socketStatus { + return &socketStatus{ + state: NotBypassed, + pid: pid, + sockfd: sockfd, + sockDomain: sockDomain, + sockType: sockType, + sockProto: sockProto, + socketOptions: []socketOption{}, + fcntlOptions: []fcntlOption{}, + logger: logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}), + } } -// recordSocketOption records socket option. -func (info *socketInfo) recordSocketOption(ctx *context, logger *logrus.Entry) error { - sockfd := ctx.req.Data.Args[0] +func (ss *socketStatus) handleSysSetsockopt(ctx *context) error { + ss.logger.Debug("handle setsockopt") level := ctx.req.Data.Args[1] optname := ctx.req.Data.Args[2] optlen := ctx.req.Data.Args[4] @@ -84,66 +104,188 @@ func (info *socketInfo) recordSocketOption(ctx *context, logger *logrus.Entry) e return fmt.Errorf("readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) } - key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) - _, ok := info.options[key] - if !ok { - info.options[key] = make([]socketOption, 0) - } - value := socketOption{ level: level, optname: optname, optval: optval, optlen: optlen, } - info.options[key] = append(info.options[key], value) + ss.socketOptions = append(ss.socketOptions, value) - logger.Debugf("recorded socket option sockfd=%d level=%d optname=%d optval=%v optlen=%d", sockfd, level, optname, optval, optlen) + ss.logger.Infof("setsockopt level=%d optname=%d optval=%v optlen=%d was recorded.", level, optname, optval, optlen) return nil } -// recordSocketOption records socket option. -func (info *socketInfo) recordFcntl(ctx *context, logger *logrus.Entry) error { - sockfd := ctx.req.Data.Args[0] - cmd := ctx.req.Data.Args[1] - value := ctx.req.Data.Args[2] +func (ss *socketStatus) handleSysFcntl(ctx *context) { + ss.logger.Debug("handle fcntl") + fcntlCmd := ctx.req.Data.Args[1] + switch fcntlCmd { + case unix.F_SETFD: // 0x2 + case unix.F_SETFL: // 0x4 + opt := fcntlOption{ + cmd: fcntlCmd, + value: ctx.req.Data.Args[2], + } + ss.fcntlOptions = append(ss.fcntlOptions, opt) + ss.logger.Infof("fcntl cmd=0x%x value=%d was recorded.", fcntlCmd, opt.value) + case unix.F_GETFL: // 0x3 + // ignore these + default: + ss.logger.Warnf("Unknown fcntl command 0x%x ignored.", fcntlCmd) + } +} + +func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { + destAddr, err := readSockaddrFromProcess(ss.pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + if err != nil { + ss.logger.Errorf("failed to read sockaddr from process: %q", err) + return + } + ss.addr = destAddr - key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) - _, ok := info.fcntlOptions[key] - if !ok { - info.fcntlOptions[key] = make([]fcntlOption, 0) + isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) + if isNotBypassed { + ss.logger.Infof("destination address %v is not bypassed.", destAddr.IP) + ss.state = NotBypassable + return } - option := fcntlOption{ - cmd: cmd, - value: value, + sockfdOnHost, err := syscall.Socket(ss.sockDomain, ss.sockType, ss.sockProto) + if err != nil { + ss.logger.Errorf("failed to create socket: %q", err) + ss.state = NotBypassable + return } - info.fcntlOptions[key] = append(info.fcntlOptions[key], option) + defer syscall.Close(sockfdOnHost) - logger.Debugf("recorded fcntl sockfd=%d cmd=%d value=%d", sockfd, cmd, value) - return nil + err = ss.configureSocket(sockfdOnHost) + if err != nil { + ss.logger.Errorf("failed to configure socket: %q", err) + ss.state = NotBypassable + return + } + + addfd := seccompNotifAddFd{ + id: ctx.req.ID, + flags: SeccompAddFdFlagSetFd, + srcfd: uint32(sockfdOnHost), + newfd: uint32(ctx.req.Data.Args[0]), + newfdFlags: 0, + } + + err = addfd.ioctlNotifAddFd(ctx.notifFd) + if err != nil { + ss.logger.Errorf("ioctl NotifAddFd failed: %q", err) + ss.state = NotBypassable + return + } + + ss.state = Bypassed + ss.logger.Infof("bypassed connect socket destAddr=%s", ss.addr) } -// deleteSocketOptions delete recorded socket options and status -func (info *socketInfo) deleteSocket(ctx *context, logger *logrus.Entry) { - sockfd := ctx.req.Data.Args[0] - key := fmt.Sprintf("%d:%d", ctx.req.Pid, sockfd) - _, ok := info.options[key] - if ok { - delete(info.options, key) - logger.Debugf("removed socket options") +func (ss *socketStatus) handleSysBind(handler *notifHandler, ctx *context) { + sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + if err != nil { + ss.logger.Errorf("failed to read sockaddr from process: %q", err) + ss.state = NotBypassable + return } - _, ok = info.fcntlOptions[key] - if ok { - delete(info.fcntlOptions, key) - logger.Debugf("removed fcntl options") + ss.addr = sa + + ss.logger.Infof("handle port=%d, ip=%v", sa.Port, sa.IP) + + // TODO: get port-fowrad mapping from nerdctl + fwdPort, ok := handler.forwardingPorts[int(sa.Port)] + if !ok { + ss.logger.Infof("port=%d is not target of port forwarding.", sa.Port) + ss.state = NotBypassable + return + } + + sockfdOnHost, err := syscall.Socket(ss.sockDomain, ss.sockType, ss.sockProto) + if err != nil { + ss.logger.Errorf("failed to create socket: %q", err) + ss.state = NotBypassable + return + } + defer syscall.Close(sockfdOnHost) + + err = ss.configureSocket(sockfdOnHost) + if err != nil { + ss.logger.Errorf("failed to configure socket: %q", err) + ss.state = NotBypassable + return } - status, ok := info.status[key] - if ok { - delete(info.status, key) - syscall.Close(status.fdInNetns) - syscall.Close(status.fdInHost) - logger.Debugf("removed socket status(fdInNetns=%d fdInHost=%d)", status.fdInNetns, status.fdInHost) + var bind_addr syscall.Sockaddr + + switch sa.Family { + case syscall.AF_INET: + var addr [4]byte + for i := 0; i < 4; i++ { + addr[i] = sa.IP[i] + } + bind_addr = &syscall.SockaddrInet4{ + Port: fwdPort.HostPort, + Addr: addr, + } + case syscall.AF_INET6: + var addr [16]byte + for i := 0; i < 16; i++ { + addr[i] = sa.IP[i] + } + bind_addr = &syscall.SockaddrInet6{ + Port: fwdPort.HostPort, + ZoneId: sa.ScopeID, + Addr: addr, + } + } + + err = syscall.Bind(sockfdOnHost, bind_addr) + if err != nil { + ss.logger.Errorf("bind failed: %s", err) + ss.state = NotBypassable + return } + + addfd := seccompNotifAddFd{ + id: ctx.req.ID, + flags: SeccompAddFdFlagSetFd, + srcfd: uint32(sockfdOnHost), + newfd: uint32(ctx.req.Data.Args[0]), + newfdFlags: 0, + } + + err = addfd.ioctlNotifAddFd(ctx.notifFd) + if err != nil { + ss.logger.Errorf("ioctl NotifAddFd failed: %s", err) + ss.state = NotBypassable + return + } + + ss.state = Bypassed + ss.logger.Infof("bypassed bind socket for %d:%d is done", fwdPort.HostPort, fwdPort.ChildPort) + + ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) +} + +func (ss *socketStatus) configureSocket(sockfd int) error { + for _, optVal := range ss.socketOptions { + _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) + if errno != 0 { + return fmt.Errorf("setsockopt failed(%v): %s", optVal, errno) + } + ss.logger.Debugf("configured socket option val=%v", optVal) + } + + for _, fcntlVal := range ss.fcntlOptions { + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(sockfd), uintptr(fcntlVal.cmd), uintptr(fcntlVal.value)) + if errno != 0 { + return fmt.Errorf("fnctl failed(%v): %s", fcntlVal, errno) + } + ss.logger.Debugf("configured socket fcntl val=%v", fcntlVal) + } + + return nil } diff --git a/pkg/oci/oci.go b/pkg/oci/oci.go index ce6069d..12e00a1 100644 --- a/pkg/oci/oci.go +++ b/pkg/oci/oci.go @@ -11,7 +11,7 @@ const ( SocketName = "bypass4netns.sock" ) -var SyscallsToBeNotified = []string{"bind", "close", "connect", "sendmsg", "sendto", "setsockopt", "fcntl"} +var SyscallsToBeNotified = []string{"bind", "close", "connect", "setsockopt", "fcntl", "_exit", "exit_group"} func GetDefaultSeccompProfile(listenerPath string) *specs.LinuxSeccomp { tmpl := specs.LinuxSeccomp{ From 1daeb04ec77777ed960125033bac24f8d7b987e2 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Mon, 6 Nov 2023 12:24:22 +0000 Subject: [PATCH 04/55] handling connections to bypassed socket When the listening socket is bypassed, processes in a container cannot connect to the socket with inner port. This patch handles connection to published port and rewrite destination address. TODO: return dummy destination address when getpeername(2) called. Assuming the below situation. When port 5021 is published as port 5202 (`-p 5202:5201`), other processes in the container try to connect to 127.0.0.1:5201 or interface's address (e.g. 10.4.0.38:5201). bypass4netns handles such connection and rewrite the destination address to 127.0.0.1:5202 or ::1:5202 Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 19 +++++ .../nonbypassable/nonbypassable.go | 21 +++++- pkg/bypass4netns/socket.go | 69 +++++++++++++++++-- 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index dc6a94f..ce689a9 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -85,6 +85,25 @@ func readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { return buffer[:size], nil } +// writeProcMem writes data to memory of specified pid process at the specified offset. +func writeProcMem(pid uint32, offset uint64, buf []byte) error { + memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_WRONLY, 0o777) + if err != nil { + return err + } + defer unix.Close(memfd) + + size, err := unix.Pwrite(memfd, buf, int64(offset)) + if err != nil { + return err + } + if len(buf) != size { + return fmt.Errorf("data is not written successfully. expected size=%d actual size=%d", len(buf), size) + } + + return nil +} + func handleNewMessage(sockfd int) (uintptr, *specs.ContainerProcessState, error) { const maxNameLen = 4096 stateBuf := make([]byte, maxNameLen) diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index ff6eab2..6a18a7e 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -31,6 +31,7 @@ func New(staticList []net.IPNet) *NonBypassable { type NonBypassable struct { staticList []net.IPNet dynamicList []net.IPNet + intefaceIPs []net.IP mu sync.RWMutex } @@ -45,6 +46,18 @@ func (x *NonBypassable) Contains(ip net.IP) bool { return false } +func (x *NonBypassable) IsInterfaceIPAddress(ip net.IP) bool { + x.mu.RLock() + defer x.mu.RUnlock() + for _, intfIP := range x.intefaceIPs { + if intfIP.Equal(ip) { + return true + } + } + + return false +} + // WatchNS watches the NS associated with the PID and updates the internal dynamic list on receiving SIGHUP. func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { selfExe, err := os.Executable() @@ -106,9 +119,10 @@ func (x *NonBypassable) watchNS(r io.Reader) { continue } var newList []net.IPNet + var newInterfaceIPs []net.IP for _, intf := range msg.Interfaces { for _, cidr := range intf.CIDRs { - _, ipNet, err := net.ParseCIDR(cidr) + ip, ipNet, err := net.ParseCIDR(cidr) if err != nil { logrus.WithError(err).Warnf("Dynamic non-bypassable list: Failed to parse nsagent message %q: %q: bad CIDR %q", line, intf.Name, cidr) continue @@ -116,11 +130,16 @@ func (x *NonBypassable) watchNS(r io.Reader) { if ipNet != nil { newList = append(newList, *ipNet) } + if !ip.IsLoopback() { + newInterfaceIPs = append(newInterfaceIPs, ip) + } } } x.mu.Lock() logrus.Infof("Dynamic non-bypassable list: old dynamic=%v, new dynamic=%v, static=%v", x.dynamicList, newList, x.staticList) + logrus.Infof("Interface's IP address list: %v", newInterfaceIPs) x.dynamicList = newList + x.intefaceIPs = newInterfaceIPs x.mu.Unlock() } if err := scanner.Err(); err != nil { diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 45c4513..ac5f3a4 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -1,7 +1,9 @@ package bypass4netns import ( + "encoding/binary" "fmt" + "net" "syscall" "unsafe" @@ -34,10 +36,8 @@ const ( // Bypassed means that the socket is replaced by one created on the host Bypassed - // SwitchBacked means that the socket was bypassed but now rereplaced to the socket in netns. - // This state can be hannpend in connect(2), sendto(2) and sendmsg(2) - // when connecting to a host outside of netns and then connecting to a host inside of netns with same fd. - //SwitchBacked + // Error happened after bypass. Nothing can be done to recover from this state. + Error ) func (ss socketState) String() string { @@ -48,8 +48,8 @@ func (ss socketState) String() string { return "NotBypassed" case Bypassed: return "Bypassed" - //case SwitchBacked: - // return "SwitchBacked" + case Error: + return "Error" default: panic(fmt.Sprintf("unexpected enum %d: String() is not implmented", ss)) } @@ -143,8 +143,22 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } ss.addr = destAddr + // check wheter the destination is bypassed or not. + connectToLoopback := false + connectToInterface := false + fwdPort, ok := handler.forwardingPorts[int(destAddr.Port)] + if ok { + if destAddr.IP.IsLoopback() { + ss.logger.Infof("destination address %v is loopback and bypassed", destAddr) + connectToLoopback = true + } else if handler.nonBypassable.IsInterfaceIPAddress(destAddr.IP) { + ss.logger.Infof("destination address %v is interface's address and bypassed", destAddr) + connectToInterface = true + } + } + isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) - if isNotBypassed { + if !connectToLoopback && !connectToInterface && isNotBypassed { ss.logger.Infof("destination address %v is not bypassed.", destAddr.IP) ss.state = NotBypassable return @@ -180,6 +194,47 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { return } + if connectToLoopback || connectToInterface { + p := make([]byte, 2) + binary.BigEndian.PutUint16(p, uint16(fwdPort.HostPort)) + // writing host port at sock_addr's port offset + // TODO: should we return dummy value when getpeername(2) is called? + err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+2, p) + if err != nil { + ss.logger.Errorf("failed to rewrite destination port: %q", err) + ss.state = Error + return + } + ss.logger.Infof("destination's port %d is rewritten to host-side port %d", ss.addr.Port, fwdPort.HostPort) + } + + if connectToInterface { + var addr net.IP + // writing host's loopback address to connect to bypassed socket at sock_addr's address offset + // TODO: should we return dummy value when getpeername(2) is called? + switch destAddr.Family { + case syscall.AF_INET: + // create loopback address "127.0.0.1" + addr = net.IPv4zero + addr[0] = 127 + addr[4] = 1 + err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, addr[0:4]) + case syscall.AF_INET6: + addr = net.IPv6loopback + err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, addr[0:16]) + default: + ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) + ss.state = Error + return + } + if err != nil { + ss.logger.Errorf("failed to rewrite destination address: %q", err) + ss.state = Error + return + } + ss.logger.Infof("destination address %s is rewritten to host loopback address %s", destAddr.IP, addr) + } + ss.state = Bypassed ss.logger.Infof("bypassed connect socket destAddr=%s", ss.addr) } From d285ff463c4886bcdf10c400010ccbeaab578774 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 7 Nov 2023 14:15:12 +0000 Subject: [PATCH 05/55] handling connections to other container's bypassed socket Normally, inter-container connections are handled by slirp4netns. When the listening socket is bypassed, other containers cannot connect to the socket. This patch handles such connections and rewrite destination address. TODO: care CNI-plugin's filter Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 14 +- cmd/bypass4netnsd/main.go | 67 +++++- cmd/bypass4netnsd/main_test.go | 76 +++++- pkg/api/com/api.go | 18 ++ pkg/api/com/client.go | 227 ++++++++++++++++++ pkg/api/com/router.go | 123 ++++++++++ pkg/bypass4netns/bypass4netns.go | 52 +++- .../nonbypassable/nonbypassable.go | 61 ++++- pkg/bypass4netns/nsagent/nsagent.go | 3 +- pkg/bypass4netns/nsagent/types/types.go | 5 +- pkg/bypass4netns/socket.go | 47 +++- pkg/bypass4netnsd/bypass4netnsd.go | 70 ++++-- pkg/util/util.go | 13 + 13 files changed, 721 insertions(+), 55 deletions(-) create mode 100644 pkg/api/com/api.go create mode 100644 pkg/api/com/client.go create mode 100644 pkg/api/com/router.go create mode 100644 pkg/util/util.go diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index b26b3d1..6c37bd1 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -22,11 +22,12 @@ import ( ) var ( - socketFile string - pidFile string - logFilePath string - readyFd int - exitFd int + socketFile string + comSocketFile string + pidFile string + logFilePath string + readyFd int + exitFd int ) func main() { @@ -37,6 +38,7 @@ func main() { } flag.StringVar(&socketFile, "socket", filepath.Join(xdgRuntimeDir, oci.SocketName), "Socket file") + flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.IntVar(&readyFd, "ready-fd", -1, "File descriptor to notify when ready") @@ -105,7 +107,7 @@ func main() { logrus.Infof("SocketPath: %s", socketFile) - handler := bypass4netns.NewHandler(socketFile) + handler := bypass4netns.NewHandler(socketFile, comSocketFile) subnets := []net.IPNet{} var subnetsAuto bool diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index 3b1aa7c..ac050b9 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/gorilla/mux" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/api/daemon/router" "github.com/rootless-containers/bypass4netns/pkg/bypass4netnsd" pkgversion "github.com/rootless-containers/bypass4netns/pkg/version" @@ -20,10 +21,11 @@ import ( ) var ( - socketFile string - pidFile string - logFilePath string - b4nnPath string + socketFile string + comSocketFile string // socket for channel with bypass4netns + pidFile string + logFilePath string + b4nnPath string ) func main() { @@ -39,6 +41,7 @@ func main() { defaultB4nnPath := filepath.Join(filepath.Dir(exePath), "bypass4netns") flag.StringVar(&socketFile, "socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd.sock"), "Socket file") + flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.StringVar(&b4nnPath, "b4nn-executable", defaultB4nnPath, "Path to bypass4netns executable") @@ -75,6 +78,11 @@ func main() { } logrus.Infof("SocketPath: %s", socketFile) + if err := os.Remove(comSocketFile); err != nil && !errors.Is(err, os.ErrNotExist) { + logrus.Fatalf("Cannot cleanup communication socket file: %v", err) + } + logrus.Infof("CommunicationSocketPath: %s", comSocketFile) + if pidFile != "" { pid := fmt.Sprintf("%d", os.Getpid()) if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil { @@ -98,15 +106,34 @@ func main() { } logrus.Infof("bypass4netns executable path: %s", b4nnPath) - err = listenServeAPI(socketFile, &router.Backend{ - BypassDriver: bypass4netnsd.NewDriver(b4nnPath), - }) - if err != nil { - logrus.Fatalf("failed to serve API: %s", err) - } + b4nsdDriver := bypass4netnsd.NewDriver(b4nnPath, comSocketFile) + + waitChan := make(chan bool) + go func() { + err = listenServeNerdctlAPI(socketFile, &router.Backend{ + BypassDriver: b4nsdDriver, + }) + if err != nil { + logrus.Fatalf("failed to serve nerdctl API: %q", err) + } + waitChan <- true + }() + + go func() { + err = listenServeBypass4netnsAPI(comSocketFile, &com.Backend{ + BypassDriver: b4nsdDriver, + }) + if err != nil { + logrus.Fatalf("failed to serve bypass4netns: %q", err) + } + waitChan <- true + }() + + <-waitChan + logrus.Fatalf("process exited") } -func listenServeAPI(socketPath string, backend *router.Backend) error { +func listenServeNerdctlAPI(socketPath string, backend *router.Backend) error { r := mux.NewRouter() router.AddRoutes(r, backend) srv := &http.Server{Handler: r} @@ -118,6 +145,22 @@ func listenServeAPI(socketPath string, backend *router.Backend) error { if err != nil { return err } - logrus.Infof("Starting to serve on %s", socketPath) + logrus.Infof("Starting nerdctl API to serve on %s", socketPath) + return srv.Serve(l) +} + +func listenServeBypass4netnsAPI(sockPath string, backend *com.Backend) error { + r := mux.NewRouter() + com.AddRoutes(r, backend) + srv := &http.Server{Handler: r} + err := os.RemoveAll(sockPath) + if err != nil { + return err + } + l, err := net.Listen("unix", sockPath) + if err != nil { + return err + } + logrus.Infof("Starting bypass4netns API to serve on %s", sockPath) return srv.Serve(l) } diff --git a/cmd/bypass4netnsd/main_test.go b/cmd/bypass4netnsd/main_test.go index a6fa53f..bf28716 100644 --- a/cmd/bypass4netnsd/main_test.go +++ b/cmd/bypass4netnsd/main_test.go @@ -2,18 +2,20 @@ package main import ( "context" + "net" "os" "path/filepath" "syscall" "testing" "github.com/rootless-containers/bypass4netns/pkg/api" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/api/daemon/client" "github.com/stretchr/testify/assert" ) // Start bypass4netnsd before testing -func TestBypass4netnsd(t *testing.T) { +func TestNerdctlAPI(t *testing.T) { xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR") if xdgRuntimeDir == "" { panic("$XDG_RUNTIME_DIR needs to be set") @@ -57,3 +59,75 @@ func isProcessRunning(pid int) bool { return err == nil } + +func TestBypass4netnsAPI(t *testing.T) { + xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR") + if xdgRuntimeDir == "" { + panic("$XDG_RUNTIME_DIR needs to be set") + } + client, err := com.NewComClient(filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock")) + if err != nil { + t.Fatalf("failed to create ComClient %q", err) + } + + mac, err := net.ParseMAC("ea:1e:d5:cd:e2:ea") + assert.Equal(t, nil, err) + ip, ipNet, err := net.ParseCIDR("10.4.0.53/24") + assert.Equal(t, nil, err) + ipNet.IP = ip + cid := "c70ae35d2aeb4c98c5ef9eb4" + containerIf := com.ContainerInterfaces{ + ContainerID: cid, + Interfaces: []com.Interface{ + { + Name: "eth0", + HWAddr: mac, + Addresses: []net.IPNet{*ipNet}, + IsLoopback: false, + }, + }, + ForwardingPorts: map[int]int{ + 5201: 5202, + }, + } + err = client.Ping(context.TODO()) + assert.Equal(t, nil, err) + + ifs, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 0, len(ifs)) + + // this should be error + _, err = client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.NotEqual(t, nil, err) + + // Registering interface + postedIfs, err := client.PostInterface(context.TODO(), &containerIf) + assert.Equal(t, nil, err) + assert.Equal(t, postedIfs.ContainerID, containerIf.ContainerID) + assert.Equal(t, postedIfs.Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + + ifs2, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 1, len(ifs2)) + assert.Equal(t, ifs2[cid].ContainerID, containerIf.ContainerID) + assert.Equal(t, ifs2[cid].Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + assert.Equal(t, ifs2[cid].ForwardingPorts[5201], 5202) + + ifs3, err := client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.Equal(t, nil, err) + assert.Equal(t, ifs3.ContainerID, containerIf.ContainerID) + assert.Equal(t, ifs3.Interfaces[0].HWAddr, containerIf.Interfaces[0].HWAddr) + assert.Equal(t, ifs3.ForwardingPorts[5201], 5202) + + // Removing interface + err = client.DeleteInterface(context.TODO(), containerIf.ContainerID) + assert.Equal(t, nil, err) + + ifs4, err := client.ListInterfaces(context.TODO()) + assert.Equal(t, nil, err) + assert.Equal(t, 0, len(ifs4)) + + _, err = client.GetInterface(context.TODO(), containerIf.ContainerID) + assert.NotEqual(t, nil, err) +} diff --git a/pkg/api/com/api.go b/pkg/api/com/api.go new file mode 100644 index 0000000..5a3a916 --- /dev/null +++ b/pkg/api/com/api.go @@ -0,0 +1,18 @@ +package com + +import ( + "net" +) + +type ContainerInterfaces struct { + ContainerID string `json:"containerID"` + Interfaces []Interface `json:"interfaces"` + // key is "container-side" port, value is host-side port + ForwardingPorts map[int]int `json:"forwardingPorts"` +} +type Interface struct { + Name string `json:"name"` + HWAddr net.HardwareAddr `json:"hwAddr"` + Addresses []net.IPNet `json:"addresses"` + IsLoopback bool `json:"isLoopback"` +} diff --git a/pkg/api/com/client.go b/pkg/api/com/client.go new file mode 100644 index 0000000..6eae247 --- /dev/null +++ b/pkg/api/com/client.go @@ -0,0 +1,227 @@ +// This code is copied from https://github.com/rootless-containers/rootlesskit/blob/master/pkg/api/client/client.go v0.14.6 +// The code is licensed under Apache-2.0 + +package com + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + + "github.com/rootless-containers/bypass4netns/pkg/api" +) + +type ComClient struct { + client *http.Client + version string + dummyHost string +} + +func NewComClient(socketPath string) (*ComClient, error) { + if _, err := os.Stat(socketPath); err != nil { + return nil, err + } + hc := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + var d net.Dialer + return d.DialContext(ctx, "unix", socketPath) + }, + }, + } + + return &ComClient{ + client: hc, + version: "v1", + dummyHost: "bypass4netnsd-com", + }, nil +} + +func readAtMost(r io.Reader, maxBytes int) ([]byte, error) { + lr := &io.LimitedReader{ + R: r, + N: int64(maxBytes), + } + b, err := io.ReadAll(lr) + if err != nil { + return b, err + } + if lr.N == 0 { + return b, fmt.Errorf("expected at most %d bytes, got more", maxBytes) + } + return b, nil +} + +// HTTPStatusErrorBodyMaxLength specifies the maximum length of HTTPStatusError.Body +const HTTPStatusErrorBodyMaxLength = 64 * 1024 + +// HTTPStatusError is created from non-2XX HTTP response +type HTTPStatusError struct { + // StatusCode is non-2XX status code + StatusCode int + // Body is at most HTTPStatusErrorBodyMaxLength + Body string +} + +// Error implements error. +// If e.Body is a marshalled string of api.ErrorJSON, Error returns ErrorJSON.Message . +// Otherwise Error returns a human-readable string that contains e.StatusCode and e.Body. +func (e *HTTPStatusError) Error() string { + if e.Body != "" && len(e.Body) < HTTPStatusErrorBodyMaxLength { + var ej api.ErrorJSON + if json.Unmarshal([]byte(e.Body), &ej) == nil { + return ej.Message + } + } + return fmt.Sprintf("unexpected HTTP status %s, body=%q", http.StatusText(e.StatusCode), e.Body) +} + +func successful(resp *http.Response) error { + if resp == nil { + return errors.New("nil response") + } + if resp.StatusCode/100 != 2 { + b, _ := readAtMost(resp.Body, HTTPStatusErrorBodyMaxLength) + return &HTTPStatusError{ + StatusCode: resp.StatusCode, + Body: string(b), + } + } + return nil +} + +func (c *ComClient) Ping(ctx context.Context) error { + m, err := json.Marshal("ping") + if err != nil { + return err + } + u := fmt.Sprintf("http://%s/%s/ping", c.dummyHost, c.version) + req, err := http.NewRequest("GET", u, bytes.NewReader(m)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return err + } + dec := json.NewDecoder(resp.Body) + var pong string + if err := dec.Decode(&pong); err != nil { + return err + } + + if pong != "pong" { + return fmt.Errorf("unexpected response expected=%q actual=%q", "pong", pong) + } + return nil +} + +func (c *ComClient) ListInterfaces(ctx context.Context) (map[string]ContainerInterfaces, error) { + u := fmt.Sprintf("http://%s/%s/interfaces", c.dummyHost, c.version) + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs map[string]ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return containerIfs, nil +} + +func (c *ComClient) GetInterface(ctx context.Context, id string) (*ContainerInterfaces, error) { + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, id) + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return &containerIfs, nil +} + +func (c *ComClient) PostInterface(ctx context.Context, ifs *ContainerInterfaces) (*ContainerInterfaces, error) { + m, err := json.Marshal(ifs) + if err != nil { + return nil, err + } + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, ifs.ContainerID) + req, err := http.NewRequest("POST", u, bytes.NewReader(m)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return nil, err + } + dec := json.NewDecoder(resp.Body) + var containerIfs ContainerInterfaces + if err := dec.Decode(&containerIfs); err != nil { + return nil, err + } + + return &containerIfs, nil +} + +func (c *ComClient) DeleteInterface(ctx context.Context, id string) error { + u := fmt.Sprintf("http://%s/%s/interface/%s", c.dummyHost, c.version, id) + req, err := http.NewRequest("DELETE", u, nil) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req = req.WithContext(ctx) + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if err := successful(resp); err != nil { + return err + } + return nil +} diff --git a/pkg/api/com/router.go b/pkg/api/com/router.go new file mode 100644 index 0000000..0cbe188 --- /dev/null +++ b/pkg/api/com/router.go @@ -0,0 +1,123 @@ +package com + +import ( + "encoding/json" + "errors" + "net/http" + + "github.com/gorilla/mux" + "github.com/rootless-containers/bypass4netns/pkg/api" +) + +type Backend struct { + BypassDriver BypassDriver +} + +type BypassDriver interface { + ListInterfaces() map[string]ContainerInterfaces + GetInterface(id string) *ContainerInterfaces + PostInterface(id string, containerIfs *ContainerInterfaces) + DeleteInterface(id string) +} + +func AddRoutes(r *mux.Router, b *Backend) { + v1 := r.PathPrefix("/v1").Subrouter() + _ = v1 + v1.Path("/ping").Methods("GET").HandlerFunc(b.ping) + v1.Path("/interfaces").Methods("GET").HandlerFunc(b.listInterfaces) + v1.Path("/interface/{id}").Methods("GET").HandlerFunc(b.getInterface) + v1.Path("/interface/{id}").Methods("POST").HandlerFunc(b.postInterface) + v1.Path("/interface/{id}").Methods("DELETE").HandlerFunc(b.deleteInterface) +} + +func (b *Backend) onError(w http.ResponseWriter, r *http.Request, err error, ec int) { + w.WriteHeader(ec) + w.Header().Set("Content-Type", "application/json") + // it is safe to return the err to the client, because the client is reliable + e := api.ErrorJSON{ + Message: err.Error(), + } + _ = json.NewEncoder(w).Encode(e) +} + +func (b *Backend) ping(w http.ResponseWriter, r *http.Request) { + m, err := json.Marshal("pong") + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) listInterfaces(w http.ResponseWriter, r *http.Request) { + ifs := b.BypassDriver.ListInterfaces() + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) getInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + ifs := b.BypassDriver.GetInterface(id) + if ifs == nil { + b.onError(w, r, errors.New("not found"), http.StatusNotFound) + return + } + + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) postInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + decoder := json.NewDecoder(r.Body) + var containerIfs ContainerInterfaces + if err := decoder.Decode(&containerIfs); err != nil { + b.onError(w, r, err, http.StatusBadRequest) + return + } + b.BypassDriver.PostInterface(id, &containerIfs) + + ifs := b.BypassDriver.GetInterface(id) + m, err := json.Marshal(ifs) + if err != nil { + b.onError(w, r, err, http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(m) +} + +func (b *Backend) deleteInterface(w http.ResponseWriter, r *http.Request) { + id, ok := mux.Vars(r)["id"] + if !ok { + b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) + return + } + + b.BypassDriver.DeleteInterface(id) +} diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index ce689a9..41b5718 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -10,9 +10,11 @@ import ( "fmt" "net" "syscall" + "time" "github.com/opencontainers/runtime-spec/specs-go" "github.com/oraoto/go-pidfd" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nonbypassable" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" @@ -382,6 +384,7 @@ type ForwardPortMapping struct { type Handler struct { socketPath string + comSocketPath string ignoredSubnets []net.IPNet ignoredSubnetsAutoUpdate bool readyFd int @@ -391,9 +394,10 @@ type Handler struct { } // NewHandler creates new seccomp notif handler -func NewHandler(socketPath string) *Handler { +func NewHandler(socketPath, comSocketPath string) *Handler { handler := Handler{ socketPath: socketPath, + comSocketPath: comSocketPath, ignoredSubnets: []net.IPNet{}, forwardingPorts: map[int]ForwardPortMapping{}, readyFd: -1, @@ -442,6 +446,9 @@ type notifHandler struct { // key is pid processes map[uint32]*processStatus + + // key is container ID + containerInterfaces map[string]com.ContainerInterfaces } func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { @@ -503,5 +510,48 @@ func (h *Handler) StartHandle() { logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) go notifHandler.handle() + go notifHandler.startBackgroundTask(h.comSocketPath) + } +} + +func (h *notifHandler) startBackgroundTask(comSocketPath string) { + logrus.Info("Started bypass4netns background task") + comClient, err := com.NewComClient(comSocketPath) + if err != nil { + logrus.Fatalf("failed to create ComClient: %q", err) + } + err = comClient.Ping(gocontext.TODO()) + if err != nil { + logrus.Fatalf("failed to connect to bypass4netnsd: %q", err) + } + logrus.Infof("Successfully connected to bypass4netnsd") + ifLastUpdateUnix := int64(0) + for { + lastUpdated := h.nonBypassable.GetLastUpdateUnix() + if lastUpdated > ifLastUpdateUnix { + ifs := h.nonBypassable.GetInterfaces() + containerIfs := &com.ContainerInterfaces{ + ContainerID: h.state.State.ID, + Interfaces: ifs, + ForwardingPorts: map[int]int{}, + } + for _, v := range h.forwardingPorts { + containerIfs.ForwardingPorts[v.ChildPort] = v.HostPort + } + logrus.Infof("Interfaces = %v", containerIfs) + _, err = comClient.PostInterface(gocontext.TODO(), containerIfs) + if err != nil { + logrus.WithError(err).Errorf("failed to post interfaces") + } else { + logrus.Infof("successfully posted updated interfaces") + ifLastUpdateUnix = lastUpdated + } + } + h.containerInterfaces, err = comClient.ListInterfaces(gocontext.TODO()) + if err != nil { + logrus.WithError(err).Warn("failed to list container interfaces") + } + + time.Sleep(1 * time.Second) } } diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index 6a18a7e..206d350 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -13,7 +13,9 @@ import ( "os/signal" "strconv" "sync" + "time" + "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent/types" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -29,10 +31,11 @@ func New(staticList []net.IPNet) *NonBypassable { // NonBypassable maintains the list of the non-bypassable CIDRs, // such as 127.0.0.0/8 and CNI bridge CIDRs in the slirp's network namespace. type NonBypassable struct { - staticList []net.IPNet - dynamicList []net.IPNet - intefaceIPs []net.IP - mu sync.RWMutex + staticList []net.IPNet + dynamicList []net.IPNet + interfaces []com.Interface + lastUpdateUnix int64 + mu sync.RWMutex } func (x *NonBypassable) Contains(ip net.IP) bool { @@ -49,15 +52,30 @@ func (x *NonBypassable) Contains(ip net.IP) bool { func (x *NonBypassable) IsInterfaceIPAddress(ip net.IP) bool { x.mu.RLock() defer x.mu.RUnlock() - for _, intfIP := range x.intefaceIPs { - if intfIP.Equal(ip) { - return true + for _, intf := range x.interfaces { + for _, intfIP := range intf.Addresses { + if intfIP.IP.Equal(ip) { + return true + } } } return false } +func (x *NonBypassable) GetInterfaces() []com.Interface { + x.mu.RLock() + defer x.mu.RUnlock() + ips := append([]com.Interface{}, x.interfaces...) + return ips +} + +func (x *NonBypassable) GetLastUpdateUnix() int64 { + x.mu.RLock() + defer x.mu.RUnlock() + return x.lastUpdateUnix +} + // WatchNS watches the NS associated with the PID and updates the internal dynamic list on receiving SIGHUP. func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { selfExe, err := os.Executable() @@ -119,8 +137,13 @@ func (x *NonBypassable) watchNS(r io.Reader) { continue } var newList []net.IPNet - var newInterfaceIPs []net.IP + var newInterfaces []com.Interface for _, intf := range msg.Interfaces { + i := com.Interface{ + Name: intf.Name, + Addresses: make([]net.IPNet, 0), + IsLoopback: false, + } for _, cidr := range intf.CIDRs { ip, ipNet, err := net.ParseCIDR(cidr) if err != nil { @@ -130,16 +153,30 @@ func (x *NonBypassable) watchNS(r io.Reader) { if ipNet != nil { newList = append(newList, *ipNet) } - if !ip.IsLoopback() { - newInterfaceIPs = append(newInterfaceIPs, ip) + if ip.IsLoopback() { + i.IsLoopback = true + } + ifIPNet := net.IPNet{ + IP: ip, + Mask: ipNet.Mask, + } + i.Addresses = append(i.Addresses, ifIPNet) + } + if !i.IsLoopback { + var err error + i.HWAddr, err = net.ParseMAC(intf.HWAddr) + if err != nil { + logrus.WithError(err).Errorf("invalid hardware address %q ifName=%s is ignored", intf.HWAddr, intf.Name) } } + newInterfaces = append(newInterfaces, i) } x.mu.Lock() logrus.Infof("Dynamic non-bypassable list: old dynamic=%v, new dynamic=%v, static=%v", x.dynamicList, newList, x.staticList) - logrus.Infof("Interface's IP address list: %v", newInterfaceIPs) + logrus.Infof("Interface list: %v", newInterfaces) x.dynamicList = newList - x.intefaceIPs = newInterfaceIPs + x.interfaces = newInterfaces + x.lastUpdateUnix = time.Now().Unix() x.mu.Unlock() } if err := scanner.Err(); err != nil { diff --git a/pkg/bypass4netns/nsagent/nsagent.go b/pkg/bypass4netns/nsagent/nsagent.go index 1ff4d31..0dc68c4 100644 --- a/pkg/bypass4netns/nsagent/nsagent.go +++ b/pkg/bypass4netns/nsagent/nsagent.go @@ -47,7 +47,8 @@ func inspect(w io.Writer) error { continue } entry := types.Interface{ - Name: intf.Name, + Name: intf.Name, + HWAddr: intf.HardwareAddr.String(), } for _, addr := range addrs { if ipNet, ok := addr.(*net.IPNet); ok { diff --git a/pkg/bypass4netns/nsagent/types/types.go b/pkg/bypass4netns/nsagent/types/types.go index 62158b7..f9e5c36 100644 --- a/pkg/bypass4netns/nsagent/types/types.go +++ b/pkg/bypass4netns/nsagent/types/types.go @@ -5,6 +5,7 @@ type Message struct { } type Interface struct { - Name string `json:"name"` // "lo", "eth0", etc. - CIDRs []string `json:"cidrs"` // sorted as strings + Name string `json:"name"` // "lo", "eth0", etc. + HWAddr string `json:"hwAddr"` + CIDRs []string `json:"cidrs"` // sorted as strings } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index ac5f3a4..514e2ba 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -7,6 +7,7 @@ import ( "syscall" "unsafe" + "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -143,9 +144,10 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } ss.addr = destAddr - // check wheter the destination is bypassed or not. + // check whether the destination is bypassed or not. connectToLoopback := false connectToInterface := false + connectToOtherBypassedContainer := false fwdPort, ok := handler.forwardingPorts[int(destAddr.Port)] if ok { if destAddr.IP.IsLoopback() { @@ -157,8 +159,40 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } } + // check whether the destination container socket is bypassed or not. isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) - if !connectToLoopback && !connectToInterface && isNotBypassed { + if isNotBypassed { + ss.logger.Infof("container interfaces = %v", handler.containerInterfaces) + for k, v := range handler.containerInterfaces { + // ignore myself + if k == handler.state.State.ID { + continue + } + + // check destination port is bypassed or not + dstPort, ok := v.ForwardingPorts[int(destAddr.Port)] + if !ok { + continue + } + fwdPort.ChildPort = destAddr.Port + fwdPort.HostPort = dstPort + + // check destination container has the destination address + for _, intf := range v.Interfaces { + // ignore loopback interface + if intf.IsLoopback { + continue + } + for _, addr := range intf.Addresses { + if addr.IP.Equal(destAddr.IP) { + ss.logger.Infof("destination address %v is container %q address and bypassed", destAddr, util.ShrinkID(k)) + connectToOtherBypassedContainer = true + } + } + } + } + } + if !connectToLoopback && !connectToInterface && !connectToOtherBypassedContainer && isNotBypassed { ss.logger.Infof("destination address %v is not bypassed.", destAddr.IP) ss.state = NotBypassable return @@ -194,7 +228,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { return } - if connectToLoopback || connectToInterface { + if connectToLoopback || connectToInterface || connectToOtherBypassedContainer { p := make([]byte, 2) binary.BigEndian.PutUint16(p, uint16(fwdPort.HostPort)) // writing host port at sock_addr's port offset @@ -208,7 +242,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { ss.logger.Infof("destination's port %d is rewritten to host-side port %d", ss.addr.Port, fwdPort.HostPort) } - if connectToInterface { + if connectToInterface || connectToOtherBypassedContainer { var addr net.IP // writing host's loopback address to connect to bypassed socket at sock_addr's address offset // TODO: should we return dummy value when getpeername(2) is called? @@ -216,11 +250,13 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { case syscall.AF_INET: // create loopback address "127.0.0.1" addr = net.IPv4zero + addr = addr.To4() addr[0] = 127 - addr[4] = 1 + addr[3] = 1 err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, addr[0:4]) case syscall.AF_INET6: addr = net.IPv6loopback + addr = addr.To16() err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, addr[0:16]) default: ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) @@ -232,6 +268,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { ss.state = Error return } + ss.logger.Infof("destination address %s is rewritten to host loopback address %s", destAddr.IP, addr) } diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index 1efc779..e965482 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -10,21 +10,29 @@ import ( "time" "github.com/rootless-containers/bypass4netns/pkg/api" + "github.com/rootless-containers/bypass4netns/pkg/api/com" + "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) type Driver struct { BypassExecutablePath string + ComSocketPath string bypass map[string]api.BypassStatus lock sync.RWMutex + containerInterfaces map[string]com.ContainerInterfaces + interfacesLock sync.RWMutex } -func NewDriver(execPath string) *Driver { +func NewDriver(execPath string, comSocketPath string) *Driver { return &Driver{ BypassExecutablePath: execPath, + ComSocketPath: comSocketPath, bypass: map[string]api.BypassStatus{}, lock: sync.RWMutex{}, + containerInterfaces: map[string]com.ContainerInterfaces{}, + interfacesLock: sync.RWMutex{}, } } @@ -41,7 +49,7 @@ func (d *Driver) ListBypass() []api.BypassStatus { } func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { - logger := logrus.WithFields(logrus.Fields{"ID": shrinkID(spec.ID)}) + logger := logrus.WithFields(logrus.Fields{"ID": util.ShrinkID(spec.ID)}) logger.Info("Starting bypass") b4nnArgs := []string{} @@ -72,6 +80,8 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { b4nnArgs = append(b4nnArgs, fmt.Sprintf("--ignore=%s", subnet)) } + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--com-socket=%s", d.ComSocketPath)) + // prepare pipe for ready notification readyR, readyW, err := os.Pipe() if err != nil { @@ -113,7 +123,7 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { } func (d *Driver) StopBypass(id string) error { - logger := logrus.WithFields(logrus.Fields{"ID": shrinkID(id)}) + logger := logrus.WithFields(logrus.Fields{"ID": util.ShrinkID(id)}) logger.Infof("Stopping bypass") d.lock.Lock() defer d.lock.Unlock() @@ -150,9 +160,51 @@ func (d *Driver) StopBypass(id string) error { delete(d.bypass, id) logger.Info("Stopped bypass") + // remove the container's interfaces + d.DeleteInterface(id) + return nil } +func (d *Driver) ListInterfaces() map[string]com.ContainerInterfaces { + d.interfacesLock.RLock() + defer d.interfacesLock.RUnlock() + + ifs := map[string]com.ContainerInterfaces{} + // copy map + for k := range d.containerInterfaces { + ifs[k] = d.containerInterfaces[k] + } + + return ifs +} + +func (d *Driver) GetInterface(id string) *com.ContainerInterfaces { + d.interfacesLock.RLock() + defer d.interfacesLock.RUnlock() + + ifs, ok := d.containerInterfaces[id] + if !ok { + return nil + } + + return &ifs +} + +func (d *Driver) PostInterface(id string, containerIfs *com.ContainerInterfaces) { + d.interfacesLock.Lock() + defer d.interfacesLock.Unlock() + + d.containerInterfaces[id] = *containerIfs +} + +func (d *Driver) DeleteInterface(id string) { + d.interfacesLock.Lock() + defer d.interfacesLock.Unlock() + + delete(d.containerInterfaces, id) +} + // waitForReady is from libpod // https://github.com/containers/libpod/blob/e6b843312b93ddaf99d0ef94a7e60ff66bc0eac8/libpod/networking_linux.go#L272-L308 func waitForReadyFD(cmdPid int, r *os.File) error { @@ -187,15 +239,3 @@ func waitForReadyFD(cmdPid int, r *os.File) error { } return nil } - -// shrinkID shrinks id to short(12 chars) id -// 6d9bcda7cebd551ddc9e3173d2139386e21b56b241f8459c950ef58e036f6bd8 -// to -// 6d9bcda7cebd -func shrinkID(id string) string { - if len(id) < 12 { - return id - } - - return id[0:12] -} diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000..d95c4de --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,13 @@ +package util + +// shrinkID shrinks id to short(12 chars) id +// 6d9bcda7cebd551ddc9e3173d2139386e21b56b241f8459c950ef58e036f6bd8 +// to +// 6d9bcda7cebd +func ShrinkID(id string) string { + if len(id) < 12 { + return id + } + + return id[0:12] +} From f8a1501ff37ac722f3b8def79604bd7993e5c153 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 8 Nov 2023 15:22:41 +0000 Subject: [PATCH 06/55] check bypassable when connecting to bypassed socket bypass4netns bypasses sockets connecting to bypassed socket. But this may break ACL or iptables config in intermediate NetNS. Tracer agent checks whether the container can connect to other container's port and only connectable connection is bypassed. TODO: use RAW_SOCKET not to establish TCP connections Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 31 +++-- pkg/bypass4netns/bypass4netns.go | 84 +++++++++-- .../nonbypassable/nonbypassable.go | 17 +-- pkg/bypass4netns/socket.go | 33 +---- pkg/bypass4netns/tracer/agent.go | 112 +++++++++++++++ pkg/bypass4netns/tracer/tracer.go | 130 ++++++++++++++++++ pkg/util/util.go | 19 +++ 7 files changed, 363 insertions(+), 63 deletions(-) create mode 100644 pkg/bypass4netns/tracer/agent.go create mode 100644 pkg/bypass4netns/tracer/tracer.go diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index 6c37bd1..f6d7450 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -13,6 +13,7 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/bypass4netns" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" "github.com/rootless-containers/bypass4netns/pkg/oci" pkgversion "github.com/rootless-containers/bypass4netns/pkg/version" seccomp "github.com/seccomp/libseccomp-golang" @@ -49,6 +50,7 @@ func main() { version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide + tracerFlag := flag.Bool("tracer", false, "(An internal flag. Do not use manually.)") // TODO: hide // Parse arguments flag.Parse() @@ -76,6 +78,16 @@ func main() { os.Exit(0) } + if logFilePath != "" { + logFile, err := os.Create(logFilePath) + if err != nil { + logrus.Fatalf("Cannnot write log file %s : %v", logFilePath, err) + } + defer logFile.Close() + logrus.SetOutput(io.MultiWriter(os.Stderr, logFile)) + logrus.Infof("LogFilePath: %s", logFilePath) + } + if *nsagentFlag { if err := nsagent.Main(); err != nil { logrus.Fatal(err) @@ -83,6 +95,13 @@ func main() { os.Exit(0) } + if *tracerFlag { + if err := tracer.Main(); err != nil { + logrus.Fatal(err) + } + os.Exit(0) + } + if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) { logrus.Fatalf("Cannot cleanup socket file: %v", err) } @@ -95,19 +114,9 @@ func main() { logrus.Infof("PidFilePath: %s", pidFile) } - if logFilePath != "" { - logFile, err := os.Create(logFilePath) - if err != nil { - logrus.Fatalf("Cannnot write log file %s : %v", logFilePath, err) - } - defer logFile.Close() - logrus.SetOutput(io.MultiWriter(os.Stderr, logFile)) - logrus.Infof("LogFilePath: %s", logFilePath) - } - logrus.Infof("SocketPath: %s", socketFile) - handler := bypass4netns.NewHandler(socketFile, comSocketFile) + handler := bypass4netns.NewHandler(socketFile, comSocketFile, strings.Replace(logFilePath, ".log", "-tracer.log", -1)) subnets := []net.IPNet{} var subnetsAuto bool diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 41b5718..a2837f2 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -16,6 +16,7 @@ import ( "github.com/oraoto/go-pidfd" "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nonbypassable" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -385,6 +386,7 @@ type ForwardPortMapping struct { type Handler struct { socketPath string comSocketPath string + tracerAgentLogPath string ignoredSubnets []net.IPNet ignoredSubnetsAutoUpdate bool readyFd int @@ -394,13 +396,14 @@ type Handler struct { } // NewHandler creates new seccomp notif handler -func NewHandler(socketPath, comSocketPath string) *Handler { +func NewHandler(socketPath, comSocketPath, tracerAgentLogPath string) *Handler { handler := Handler{ - socketPath: socketPath, - comSocketPath: comSocketPath, - ignoredSubnets: []net.IPNet{}, - forwardingPorts: map[int]ForwardPortMapping{}, - readyFd: -1, + socketPath: socketPath, + comSocketPath: comSocketPath, + tracerAgentLogPath: tracerAgentLogPath, + ignoredSubnets: []net.IPNet{}, + forwardingPorts: map[int]ForwardPortMapping{}, + readyFd: -1, } return &handler @@ -447,8 +450,9 @@ type notifHandler struct { // key is pid processes map[uint32]*processStatus - // key is container ID - containerInterfaces map[string]com.ContainerInterfaces + // key is destination address e.g. "192.168.1.1:1000" + containerInterfaces map[string]int + tracer *tracer.Tracer } func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { @@ -457,6 +461,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState state: state, forwardingPorts: map[int]ForwardPortMapping{}, processes: map[uint32]*processStatus{}, + tracer: tracer.NewTracer(h.tracerAgentLogPath), } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate @@ -509,6 +514,37 @@ func (h *Handler) StartHandle() { logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) + + // prepare tracer agent + err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) + if err != nil { + logrus.WithError(err).Fatalf("failed to start tracer") + } + fwdPorts := []int{} + for _, v := range notifHandler.forwardingPorts { + fwdPorts = append(fwdPorts, v.ChildPort) + } + err = notifHandler.tracer.RegisterForwardPorts(fwdPorts) + if err != nil { + logrus.WithError(err).Fatalf("failed to register port") + } + logrus.Info("registered ports to tracer agent") + + // check tracer agent is ready + for _, v := range fwdPorts { + dst := fmt.Sprintf("127.0.0.1:%d", v) + addr, err := notifHandler.tracer.ConnectToAddress([]string{dst}) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", dst) + continue + } + if len(addr) != 1 || addr[0] != dst { + logrus.Warnf("failed to connect to %s", dst) + continue + } + logrus.Infof("successfully connected to %s", dst) + } + go notifHandler.handle() go notifHandler.startBackgroundTask(h.comSocketPath) } @@ -547,11 +583,41 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { ifLastUpdateUnix = lastUpdated } } - h.containerInterfaces, err = comClient.ListInterfaces(gocontext.TODO()) + containerInterfaces, err := comClient.ListInterfaces(gocontext.TODO()) if err != nil { logrus.WithError(err).Warn("failed to list container interfaces") } + containerIf := map[string]int{} + for _, cont := range containerInterfaces { + for contPort, hostPort := range cont.ForwardingPorts { + for _, intf := range cont.Interfaces { + if intf.IsLoopback { + continue + } + for _, addr := range intf.Addresses { + // ignore ipv6 address + if addr.IP.To4() == nil { + continue + } + dstAddr := fmt.Sprintf("%s:%d", addr.IP, contPort) + addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", dstAddr) + continue + } + if len(addrRes) != 1 || addrRes[0] != dstAddr { + logrus.Warnf("failed to connect to %s", dstAddr) + continue + } + logrus.Infof("successfully connected to %s", dstAddr) + containerIf[dstAddr] = hostPort + } + } + } + } + h.containerInterfaces = containerIf + time.Sleep(1 * time.Second) } } diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index 206d350..ef025e8 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -17,6 +17,7 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent/types" + "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -92,7 +93,7 @@ func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { "-n", } selfPid := os.Getpid() - ok, err := sameUserNS(pid, selfPid) + ok, err := util.SameUserNS(pid, selfPid) if err != nil { return fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) } @@ -185,17 +186,3 @@ func (x *NonBypassable) watchNS(r io.Reader) { } } } - -func sameUserNS(pidX, pidY int) (bool, error) { - nsX := fmt.Sprintf("/proc/%d/ns/user", pidX) - nsY := fmt.Sprintf("/proc/%d/ns/user", pidY) - nsXResolved, err := os.Readlink(nsX) - if err != nil { - return false, err - } - nsYResolved, err := os.Readlink(nsY) - if err != nil { - return false, err - } - return nsXResolved == nsYResolved, nil -} diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 514e2ba..947c6a1 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -7,7 +7,6 @@ import ( "syscall" "unsafe" - "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -163,33 +162,11 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) if isNotBypassed { ss.logger.Infof("container interfaces = %v", handler.containerInterfaces) - for k, v := range handler.containerInterfaces { - // ignore myself - if k == handler.state.State.ID { - continue - } - - // check destination port is bypassed or not - dstPort, ok := v.ForwardingPorts[int(destAddr.Port)] - if !ok { - continue - } - fwdPort.ChildPort = destAddr.Port - fwdPort.HostPort = dstPort - - // check destination container has the destination address - for _, intf := range v.Interfaces { - // ignore loopback interface - if intf.IsLoopback { - continue - } - for _, addr := range intf.Addresses { - if addr.IP.Equal(destAddr.IP) { - ss.logger.Infof("destination address %v is container %q address and bypassed", destAddr, util.ShrinkID(k)) - connectToOtherBypassedContainer = true - } - } - } + hostPort, ok := handler.containerInterfaces[destAddr.String()] + if ok { + ss.logger.Infof("destination address %v is container address and bypassed", destAddr) + fwdPort.HostPort = hostPort + connectToOtherBypassedContainer = true } } if !connectToLoopback && !connectToInterface && !connectToOtherBypassedContainer && isNotBypassed { diff --git a/pkg/bypass4netns/tracer/agent.go b/pkg/bypass4netns/tracer/agent.go new file mode 100644 index 0000000..f36c146 --- /dev/null +++ b/pkg/bypass4netns/tracer/agent.go @@ -0,0 +1,112 @@ +package tracer + +import ( + "encoding/json" + "fmt" + "net" + "os" + "time" + + "github.com/sirupsen/logrus" +) + +type TracerCommand struct { + Cmd TracerCmd `json:"tracerCmd"` + ForwardingPorts []int `json:"forwardingPorts,omitempty"` + DestinationAddress []string `json:"destinationAddress,omitempty"` +} + +type TracerCmd int + +const ( + Ok TracerCmd = iota + RegisterForwardPorts + ConnectToAddress +) + +func Main() error { + r := os.Stdin + w := os.Stdout + dec := json.NewDecoder(r) + for { + var cmd TracerCommand + err := dec.Decode(&cmd) + if err != nil { + logrus.WithError(err).Errorf("failed to decode") + break + } + logrus.Infof("decoded = %v", cmd) + switch cmd.Cmd { + case RegisterForwardPorts: + for _, p := range cmd.ForwardingPorts { + readyChan := make(chan bool) + go func(port int, c chan bool) { + err := listenLoop(port, c) + if err != nil { + logrus.WithError(err).Errorf("failed to listen on port %d", port) + } + }(p, readyChan) + <-readyChan + } + cmd = TracerCommand{ + Cmd: Ok, + } + case ConnectToAddress: + addrs := []string{} + for i := range cmd.DestinationAddress { + addr := cmd.DestinationAddress[i] + err = tryToConnect(addr) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", addr) + continue + } + addrs = append(addrs, addr) + } + cmd = TracerCommand{ + Cmd: Ok, + DestinationAddress: addrs, + } + } + + m, err := json.Marshal(cmd) + if err != nil { + logrus.WithError(err).Errorf("failed to encode") + } + _, err = w.Write(m) + if err != nil { + logrus.WithError(err).Errorf("failed to write") + } + } + + logrus.Infof("Exit.") + return nil +} + +func listenLoop(port int, readyChan chan bool) error { + l, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + return err + } + defer l.Close() + + readyChan <- true + logrus.Infof("started to listen on port %d", port) + for { + conn, err := l.Accept() + if err != nil { + return err + } + conn.Close() + } +} + +func tryToConnect(addr string) error { + conn, err := net.DialTimeout("tcp", addr, 10*time.Millisecond) + if err != nil { + return err + } + defer conn.Close() + logrus.Infof("successfully connected to %s", addr) + + return nil +} diff --git a/pkg/bypass4netns/tracer/tracer.go b/pkg/bypass4netns/tracer/tracer.go new file mode 100644 index 0000000..164dd46 --- /dev/null +++ b/pkg/bypass4netns/tracer/tracer.go @@ -0,0 +1,130 @@ +package tracer + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "strconv" + + "github.com/rootless-containers/bypass4netns/pkg/util" + "golang.org/x/sys/unix" +) + +type Tracer struct { + logPath string + tracerCmd *exec.Cmd + reader io.Reader + writer io.Writer +} + +func NewTracer(logPath string) *Tracer { + return &Tracer{ + logPath: logPath, + } +} + +// StartTracer starts tracer in NS associated with the PID. +func (x *Tracer) StartTracer(ctx context.Context, pid int) error { + selfExe, err := os.Executable() + if err != nil { + return err + } + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(pid), + "-F", + "-n", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(pid, selfPid) + if err != nil { + return fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", selfExe, "--tracer", "--log-file", x.logPath) + x.tracerCmd = exec.CommandContext(ctx, nsenter, nsenterFlags...) + x.tracerCmd.SysProcAttr = &unix.SysProcAttr{ + Pdeathsig: unix.SIGTERM, + } + x.tracerCmd.Stderr = os.Stderr + x.reader, x.tracerCmd.Stdout = io.Pipe() + x.tracerCmd.Stdin, x.writer = io.Pipe() + if err := x.tracerCmd.Start(); err != nil { + return fmt.Errorf("failed to start %v: %w", x.tracerCmd.Args, err) + } + return nil +} + +func (x *Tracer) RegisterForwardPorts(ports []int) error { + cmd := TracerCommand{ + Cmd: RegisterForwardPorts, + ForwardingPorts: ports, + } + + m, err := json.Marshal(cmd) + if err != nil { + return err + } + + writeSize, err := x.writer.Write(m) + if err != nil { + return err + } + if writeSize != len(m) { + return fmt.Errorf("unexpected written size expected=%d actual=%d", len(m), writeSize) + } + + dec := json.NewDecoder(x.reader) + var resp TracerCommand + err = dec.Decode(&resp) + if err != nil { + return fmt.Errorf("invalid response: %q", err) + } + + if resp.Cmd != Ok { + return fmt.Errorf("unexpected response: %d", resp.Cmd) + } + + return nil +} + +func (x *Tracer) ConnectToAddress(addrs []string) ([]string, error) { + cmd := TracerCommand{ + Cmd: ConnectToAddress, + DestinationAddress: addrs, + } + + m, err := json.Marshal(cmd) + if err != nil { + return nil, err + } + + writeSize, err := x.writer.Write(m) + if err != nil { + return nil, err + } + if writeSize != len(m) { + return nil, fmt.Errorf("unexpected written size expected=%d actual=%d", len(m), writeSize) + } + + dec := json.NewDecoder(x.reader) + var resp TracerCommand + err = dec.Decode(&resp) + if err != nil { + return nil, fmt.Errorf("invalid response: %q", err) + } + + if resp.Cmd != Ok { + return nil, fmt.Errorf("unexpected response: %d", resp.Cmd) + } + + return resp.DestinationAddress, nil +} diff --git a/pkg/util/util.go b/pkg/util/util.go index d95c4de..3f80de4 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -1,5 +1,10 @@ package util +import ( + "fmt" + "os" +) + // shrinkID shrinks id to short(12 chars) id // 6d9bcda7cebd551ddc9e3173d2139386e21b56b241f8459c950ef58e036f6bd8 // to @@ -11,3 +16,17 @@ func ShrinkID(id string) string { return id[0:12] } + +func SameUserNS(pidX, pidY int) (bool, error) { + nsX := fmt.Sprintf("/proc/%d/ns/user", pidX) + nsY := fmt.Sprintf("/proc/%d/ns/user", pidY) + nsXResolved, err := os.Readlink(nsX) + if err != nil { + return false, err + } + nsYResolved, err := os.Readlink(nsY) + if err != nil { + return false, err + } + return nsXResolved == nsYResolved, nil +} From e091d5f46064a3180b300de782538b71c1bad4be Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 10 Nov 2023 02:25:11 +0000 Subject: [PATCH 07/55] set tracer check interval Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 21 ++++++++++++++++++--- pkg/bypass4netns/socket.go | 4 ++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index a2837f2..4e0fa0a 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -451,10 +451,16 @@ type notifHandler struct { processes map[uint32]*processStatus // key is destination address e.g. "192.168.1.1:1000" - containerInterfaces map[string]int + containerInterfaces map[string]containerInterface tracer *tracer.Tracer } +type containerInterface struct { + containerID string + hostPort int + lastCheckedUnix int64 +} + func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { notifHandler := notifHandler{ fd: libseccomp.ScmpFd(fd), @@ -588,7 +594,7 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { logrus.WithError(err).Warn("failed to list container interfaces") } - containerIf := map[string]int{} + containerIf := map[string]containerInterface{} for _, cont := range containerInterfaces { for contPort, hostPort := range cont.ForwardingPorts { for _, intf := range cont.Interfaces { @@ -601,6 +607,11 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { continue } dstAddr := fmt.Sprintf("%s:%d", addr.IP, contPort) + contIf, ok := h.containerInterfaces[dstAddr] + if ok && contIf.lastCheckedUnix+10 > time.Now().Unix() { + containerIf[dstAddr] = contIf + continue + } addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) if err != nil { logrus.WithError(err).Warnf("failed to connect to %s", dstAddr) @@ -611,7 +622,11 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { continue } logrus.Infof("successfully connected to %s", dstAddr) - containerIf[dstAddr] = hostPort + containerIf[dstAddr] = containerInterface{ + containerID: cont.ContainerID, + hostPort: hostPort, + lastCheckedUnix: time.Now().Unix(), + } } } } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 947c6a1..e81b80a 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -162,10 +162,10 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) if isNotBypassed { ss.logger.Infof("container interfaces = %v", handler.containerInterfaces) - hostPort, ok := handler.containerInterfaces[destAddr.String()] + contIf, ok := handler.containerInterfaces[destAddr.String()] if ok { ss.logger.Infof("destination address %v is container address and bypassed", destAddr) - fwdPort.HostPort = hostPort + fwdPort.HostPort = contIf.hostPort connectToOtherBypassedContainer = true } } From 07f342a3b7cd69eb6aa92346452e81b81765eec7 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 10 Nov 2023 05:18:40 +0000 Subject: [PATCH 08/55] adjust log level Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 23 +++++++++++-------- .../nonbypassable/nonbypassable.go | 1 - pkg/bypass4netns/socket.go | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 4e0fa0a..5748255 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -213,12 +213,12 @@ func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, er if !ok { proc = newProcessStatus() h.processes[pid] = proc - logger.Info("process is registered") + logger.Debug("process is registered") } sock, ok := proc.sockets[sockfd] if ok { - logger.Info("socket is already registered") + logger.Warn("socket is already registered") return sock, nil } @@ -251,7 +251,11 @@ func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, er } proc.sockets[sockfd] = sock - logger.Infof("socket is registered (state=%s)", sock.state) + if sock.state == NotBypassable { + logger.Debugf("socket is registered (state=%s)", sock.state) + } else { + logger.Infof("socket is registered (state=%s)", sock.state) + } return sock, nil } @@ -266,7 +270,7 @@ func (h *notifHandler) getSocket(pid uint32, sockfd int) *socketStatus { } func (h *notifHandler) removeSocket(pid uint32, sockfd int) { - defer logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}).Infof("socket is removed") + defer logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}).Debugf("socket is removed") proc, ok := h.processes[pid] if !ok { return @@ -289,7 +293,7 @@ func (h *notifHandler) handleReq(ctx *context) { // cleanup sockets when the process exit. if syscallName == "_exit" || syscallName == "exit_group" { delete(h.processes, ctx.req.Pid) - logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid}).Infof("process is removed") + logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid}).Debugf("process is removed") return } @@ -534,7 +538,7 @@ func (h *Handler) StartHandle() { if err != nil { logrus.WithError(err).Fatalf("failed to register port") } - logrus.Info("registered ports to tracer agent") + logrus.WithField("fwdPorts", fwdPorts).Info("registered ports to tracer agent") // check tracer agent is ready for _, v := range fwdPorts { @@ -545,11 +549,12 @@ func (h *Handler) StartHandle() { continue } if len(addr) != 1 || addr[0] != dst { - logrus.Warnf("failed to connect to %s", dst) + logrus.Fatalf("failed to connect to %s", dst) continue } - logrus.Infof("successfully connected to %s", dst) + logrus.Debugf("successfully connected to %s", dst) } + logrus.Infof("tracer is ready") go notifHandler.handle() go notifHandler.startBackgroundTask(h.comSocketPath) @@ -621,7 +626,7 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { logrus.Warnf("failed to connect to %s", dstAddr) continue } - logrus.Infof("successfully connected to %s", dstAddr) + logrus.Debugf("successfully connected to %s", dstAddr) containerIf[dstAddr] = containerInterface{ containerID: cont.ContainerID, hostPort: hostPort, diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index ef025e8..7c3b430 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -174,7 +174,6 @@ func (x *NonBypassable) watchNS(r io.Reader) { } x.mu.Lock() logrus.Infof("Dynamic non-bypassable list: old dynamic=%v, new dynamic=%v, static=%v", x.dynamicList, newList, x.staticList) - logrus.Infof("Interface list: %v", newInterfaces) x.dynamicList = newList x.interfaces = newInterfaces x.lastUpdateUnix = time.Now().Unix() diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index e81b80a..5db0d21 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -112,7 +112,7 @@ func (ss *socketStatus) handleSysSetsockopt(ctx *context) error { } ss.socketOptions = append(ss.socketOptions, value) - ss.logger.Infof("setsockopt level=%d optname=%d optval=%v optlen=%d was recorded.", level, optname, optval, optlen) + ss.logger.Debugf("setsockopt level=%d optname=%d optval=%v optlen=%d was recorded.", level, optname, optval, optlen) return nil } @@ -127,7 +127,7 @@ func (ss *socketStatus) handleSysFcntl(ctx *context) { value: ctx.req.Data.Args[2], } ss.fcntlOptions = append(ss.fcntlOptions, opt) - ss.logger.Infof("fcntl cmd=0x%x value=%d was recorded.", fcntlCmd, opt.value) + ss.logger.Debugf("fcntl cmd=0x%x value=%d was recorded.", fcntlCmd, opt.value) case unix.F_GETFL: // 0x3 // ignore these default: From 2333fc647ff848c972ac58ef1b8320a7f5df7f6e Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 10 Nov 2023 08:00:45 +0000 Subject: [PATCH 09/55] handle getpeername(2) and rewrite address This currently not utilized Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 6 ++++ pkg/bypass4netns/sockaddr.go | 33 +++++++++++++++++++ pkg/bypass4netns/sockaddr_test.go | 53 +++++++++++++++++++++++++++++++ pkg/bypass4netns/socket.go | 30 +++++++++++++++++ pkg/oci/oci.go | 2 +- 5 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 pkg/bypass4netns/sockaddr_test.go diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 5748255..fb0d240 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -314,6 +314,10 @@ func (h *notifHandler) handleReq(ctx *context) { } } + if syscallName == "getpeername" { + sock.handleSysGetpeername(ctx) + } + switch sock.state { case NotBypassable, Bypassed: return @@ -330,6 +334,8 @@ func (h *notifHandler) handleReq(ctx *context) { sock.handleSysSetsockopt(ctx) case "fcntl": sock.handleSysFcntl(ctx) + case "getpeername": + // already handled default: logrus.Errorf("Unknown syscall %q", syscallName) // TODO: error handle diff --git a/pkg/bypass4netns/sockaddr.go b/pkg/bypass4netns/sockaddr.go index 2250d99..2837afc 100644 --- a/pkg/bypass4netns/sockaddr.go +++ b/pkg/bypass4netns/sockaddr.go @@ -62,3 +62,36 @@ func newSockaddr(buf []byte) (*sockaddr, error) { } return sa, nil } + +func (sa *sockaddr) toBytes() ([]byte, error) { + res := bytes.Buffer{} + // TODO: support big endian hosts + endian := binary.LittleEndian + + // ntohs + p := make([]byte, 2) + binary.BigEndian.PutUint16(p, uint16(sa.Port)) + + switch sa.Family { + case syscall.AF_INET: + addr4 := syscall.RawSockaddrInet4{} + addr4.Family = syscall.AF_INET + copy(addr4.Addr[:], sa.IP.To4()[:]) + + addr4.Port = endian.Uint16(p) + binary.Write(&res, endian, addr4) + case syscall.AF_INET6: + addr6 := syscall.RawSockaddrInet6{} + addr6.Family = syscall.AF_INET6 + copy(addr6.Addr[:], sa.IP.To16()[:]) + + addr6.Port = endian.Uint16(p) + addr6.Flowinfo = sa.Flowinfo + addr6.Scope_id = sa.ScopeID + binary.Write(&res, endian, addr6) + default: + return nil, fmt.Errorf("expected AF_INET or AF_INET6, got %d", sa.Family) + } + + return res.Bytes(), nil +} diff --git a/pkg/bypass4netns/sockaddr_test.go b/pkg/bypass4netns/sockaddr_test.go new file mode 100644 index 0000000..c48b555 --- /dev/null +++ b/pkg/bypass4netns/sockaddr_test.go @@ -0,0 +1,53 @@ +package bypass4netns + +import ( + "net" + "syscall" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSerializeDeserializeSockaddr4(t *testing.T) { + ip := net.ParseIP("192.168.1.100") + port := 12345 + + sa := sockaddr{ + IP: ip, + Port: port, + } + sa.Family = syscall.AF_INET + + saBytes, err := sa.toBytes() + assert.Equal(t, nil, err) + assert.Equal(t, 16, len(saBytes)) + + sa2, err := newSockaddr(saBytes) + assert.Equal(t, nil, err) + assert.Equal(t, ip.String(), sa2.IP.String()) + assert.Equal(t, port, sa2.Port) +} + +func TestSerializeDeserializeSockaddr6(t *testing.T) { + ip := net.ParseIP("2001:0db8::1:0:0:1") + port := 12345 + + sa := sockaddr{ + IP: ip, + Port: port, + Flowinfo: 0x12345678, + ScopeID: 0x9abcdef0, + } + sa.Family = syscall.AF_INET6 + + saBytes, err := sa.toBytes() + assert.Equal(t, nil, err) + assert.Equal(t, 28, len(saBytes)) + + sa2, err := newSockaddr(saBytes) + assert.Equal(t, nil, err) + assert.Equal(t, ip.String(), sa2.IP.String()) + assert.Equal(t, port, sa2.Port) + assert.Equal(t, sa.Flowinfo, uint32(0x12345678)) + assert.Equal(t, sa.ScopeID, uint32(0x9abcdef0)) +} diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 5db0d21..8fd22d7 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -339,6 +339,36 @@ func (ss *socketStatus) handleSysBind(handler *notifHandler, ctx *context) { ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) } +func (ss *socketStatus) handleSysGetpeername(ctx *context) { + if ss.addr == nil { + return + } + + buf, err := ss.addr.toBytes() + if err != nil { + ss.logger.WithError(err).Errorf("failed to serialize address %s", ss.addr) + return + } + + err = writeProcMem(ss.pid, ctx.req.Data.Args[1], buf) + if err != nil { + ss.logger.WithError(err).Errorf("failed to write address %s", ss.addr) + return + } + + bufLen := make([]byte, 4) + binary.LittleEndian.PutUint32(bufLen, uint32(len(buf))) + err = writeProcMem(ss.pid, ctx.req.Data.Args[2], bufLen) + if err != nil { + ss.logger.WithError(err).Errorf("failed to write address length %d", len(buf)) + return + } + + ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) + + ss.logger.Infof("rewrite getpeername() address to %s", ss.addr) +} + func (ss *socketStatus) configureSocket(sockfd int) error { for _, optVal := range ss.socketOptions { _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(sockfd), uintptr(optVal.level), uintptr(optVal.optname), uintptr(unsafe.Pointer(&optVal.optval[0])), uintptr(optVal.optlen), 0) diff --git a/pkg/oci/oci.go b/pkg/oci/oci.go index 12e00a1..9ea3b8c 100644 --- a/pkg/oci/oci.go +++ b/pkg/oci/oci.go @@ -11,7 +11,7 @@ const ( SocketName = "bypass4netns.sock" ) -var SyscallsToBeNotified = []string{"bind", "close", "connect", "setsockopt", "fcntl", "_exit", "exit_group"} +var SyscallsToBeNotified = []string{"bind", "close", "connect", "setsockopt", "fcntl", "_exit", "exit_group", "getpeername"} func GetDefaultSeccompProfile(listenerPath string) *specs.LinuxSeccomp { tmpl := specs.LinuxSeccomp{ From 13364d48571be6246666454dbe4cc2cb74c0270c Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Mon, 13 Nov 2023 15:49:39 +0000 Subject: [PATCH 10/55] allow bypass between non-routable subnets Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 3 +- cmd/bypass4netnsd/main.go | 6 +++ pkg/bypass4netns/bypass4netns.go | 77 +++++++++++++++++------------- pkg/bypass4netns/socket.go | 17 ++++--- pkg/bypass4netnsd/bypass4netnsd.go | 5 ++ 5 files changed, 64 insertions(+), 44 deletions(-) diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index f6d7450..90ef773 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -51,6 +51,7 @@ func main() { help := flag.Bool("help", false, "Show help") nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide tracerFlag := flag.Bool("tracer", false, "(An internal flag. Do not use manually.)") // TODO: hide + disableTracerFlag := flag.Bool("disable-tracer", false, "disable connection tracer") // Parse arguments flag.Parse() @@ -207,5 +208,5 @@ func main() { os.Exit(0) }() - handler.StartHandle() + handler.StartHandle(*disableTracerFlag) } diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index ac050b9..5559b2a 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -45,6 +45,7 @@ func main() { flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.StringVar(&b4nnPath, "b4nn-executable", defaultB4nnPath, "Path to bypass4netns executable") + disableTracer := flag.Bool("disable-tracer", false, "Disable connection tracer") debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") @@ -108,6 +109,11 @@ func main() { b4nsdDriver := bypass4netnsd.NewDriver(b4nnPath, comSocketFile) + if *disableTracer { + logrus.Info("Connection tracer is disabled") + b4nsdDriver.DisableTracer = *disableTracer + } + waitChan := make(chan bool) go func() { err = listenServeNerdctlAPI(socketFile, &router.Backend{ diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index fb0d240..f250064 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -463,6 +463,7 @@ type notifHandler struct { // key is destination address e.g. "192.168.1.1:1000" containerInterfaces map[string]containerInterface tracer *tracer.Tracer + disableTracer bool } type containerInterface struct { @@ -491,7 +492,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState } // StartHandle starts seccomp notif handler -func (h *Handler) StartHandle() { +func (h *Handler) StartHandle(disableTracer bool) { logrus.Info("Waiting for seccomp file descriptors") l, err := net.Listen("unix", h.socketPath) if err != nil { @@ -530,37 +531,42 @@ func (h *Handler) StartHandle() { logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) + notifHandler.disableTracer = disableTracer // prepare tracer agent - err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) - if err != nil { - logrus.WithError(err).Fatalf("failed to start tracer") - } - fwdPorts := []int{} - for _, v := range notifHandler.forwardingPorts { - fwdPorts = append(fwdPorts, v.ChildPort) - } - err = notifHandler.tracer.RegisterForwardPorts(fwdPorts) - if err != nil { - logrus.WithError(err).Fatalf("failed to register port") - } - logrus.WithField("fwdPorts", fwdPorts).Info("registered ports to tracer agent") - - // check tracer agent is ready - for _, v := range fwdPorts { - dst := fmt.Sprintf("127.0.0.1:%d", v) - addr, err := notifHandler.tracer.ConnectToAddress([]string{dst}) + if !notifHandler.disableTracer { + err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) if err != nil { - logrus.WithError(err).Warnf("failed to connect to %s", dst) - continue + logrus.WithError(err).Fatalf("failed to start tracer") } - if len(addr) != 1 || addr[0] != dst { - logrus.Fatalf("failed to connect to %s", dst) - continue + fwdPorts := []int{} + for _, v := range notifHandler.forwardingPorts { + fwdPorts = append(fwdPorts, v.ChildPort) + } + err = notifHandler.tracer.RegisterForwardPorts(fwdPorts) + if err != nil { + logrus.WithError(err).Fatalf("failed to register port") } - logrus.Debugf("successfully connected to %s", dst) + logrus.WithField("fwdPorts", fwdPorts).Info("registered ports to tracer agent") + + // check tracer agent is ready + for _, v := range fwdPorts { + dst := fmt.Sprintf("127.0.0.1:%d", v) + addr, err := notifHandler.tracer.ConnectToAddress([]string{dst}) + if err != nil { + logrus.WithError(err).Warnf("failed to connect to %s", dst) + continue + } + if len(addr) != 1 || addr[0] != dst { + logrus.Fatalf("failed to connect to %s", dst) + continue + } + logrus.Debugf("successfully connected to %s", dst) + } + logrus.Infof("tracer is ready") + } else { + logrus.Infof("tracer is disabled") } - logrus.Infof("tracer is ready") go notifHandler.handle() go notifHandler.startBackgroundTask(h.comSocketPath) @@ -623,14 +629,16 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { containerIf[dstAddr] = contIf continue } - addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) - if err != nil { - logrus.WithError(err).Warnf("failed to connect to %s", dstAddr) - continue - } - if len(addrRes) != 1 || addrRes[0] != dstAddr { - logrus.Warnf("failed to connect to %s", dstAddr) - continue + if !h.disableTracer { + addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) + if err != nil { + logrus.WithError(err).Debugf("failed to connect to %s", dstAddr) + continue + } + if len(addrRes) != 1 || addrRes[0] != dstAddr { + logrus.Debugf("failed to connect to %s", dstAddr) + continue + } } logrus.Debugf("successfully connected to %s", dstAddr) containerIf[dstAddr] = containerInterface{ @@ -638,6 +646,7 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { hostPort: hostPort, lastCheckedUnix: time.Now().Unix(), } + logrus.Debugf("%s -> 127.0.0.1:%d is registered", dstAddr, hostPort) } } } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 8fd22d7..ac1d00d 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -158,17 +158,16 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } } + contIf, ok := handler.containerInterfaces[destAddr.String()] + if ok { + ss.logger.Infof("destination address %v is container address and bypassed", destAddr) + fwdPort.HostPort = contIf.hostPort + connectToOtherBypassedContainer = true + } + // check whether the destination container socket is bypassed or not. isNotBypassed := handler.nonBypassable.Contains(destAddr.IP) - if isNotBypassed { - ss.logger.Infof("container interfaces = %v", handler.containerInterfaces) - contIf, ok := handler.containerInterfaces[destAddr.String()] - if ok { - ss.logger.Infof("destination address %v is container address and bypassed", destAddr) - fwdPort.HostPort = contIf.hostPort - connectToOtherBypassedContainer = true - } - } + if !connectToLoopback && !connectToInterface && !connectToOtherBypassedContainer && isNotBypassed { ss.logger.Infof("destination address %v is not bypassed.", destAddr.IP) ss.state = NotBypassable diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index e965482..eb88cd1 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -23,6 +23,7 @@ type Driver struct { lock sync.RWMutex containerInterfaces map[string]com.ContainerInterfaces interfacesLock sync.RWMutex + DisableTracer bool } func NewDriver(execPath string, comSocketPath string) *Driver { @@ -33,6 +34,7 @@ func NewDriver(execPath string, comSocketPath string) *Driver { lock: sync.RWMutex{}, containerInterfaces: map[string]com.ContainerInterfaces{}, interfacesLock: sync.RWMutex{}, + DisableTracer: false, } } @@ -81,6 +83,9 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { } b4nnArgs = append(b4nnArgs, fmt.Sprintf("--com-socket=%s", d.ComSocketPath)) + if d.DisableTracer { + b4nnArgs = append(b4nnArgs, "--disable-tracer=true") + } // prepare pipe for ready notification readyR, readyW, err := os.Pipe() From ddefd309f08792c0b65e41a24ab1b34807701c4d Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 16 Nov 2023 06:13:59 +0000 Subject: [PATCH 11/55] introducing address-rewriting based overlay network TODO: allocate published port on host-side dynamically Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 34 +++++++++++--- cmd/bypass4netnsd/main.go | 28 +++++++++--- go.mod | 6 +++ go.sum | 14 ++++++ pkg/bypass4netns/bypass4netns.go | 72 ++++++++++++++++++++++++++++-- pkg/bypass4netns/socket.go | 72 +++++++++++++++++++++++------- pkg/bypass4netnsd/bypass4netnsd.go | 11 +++++ 7 files changed, 206 insertions(+), 31 deletions(-) diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index 90ef773..227eaf4 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -23,12 +23,14 @@ import ( ) var ( - socketFile string - comSocketFile string - pidFile string - logFilePath string - readyFd int - exitFd int + socketFile string + comSocketFile string + pidFile string + logFilePath string + overlayEtcd string + overlayHostAddress string + readyFd int + exitFd int ) func main() { @@ -42,6 +44,8 @@ func main() { flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") + flag.StringVar(&overlayEtcd, "overlay-etcd", "", "Etcd address for overlay network") + flag.StringVar(&overlayHostAddress, "overlay-host-address", "", "Host address for overlay network") flag.IntVar(&readyFd, "ready-fd", -1, "File descriptor to notify when ready") flag.IntVar(&exitFd, "exit-fd", -1, "File descriptor for terminating bypass4netns") ignoredSubnets := flag.StringSlice("ignore", []string{"127.0.0.0/8"}, "Subnets to ignore in bypass4netns. Can be also set to \"auto\".") @@ -52,6 +56,7 @@ func main() { nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide tracerFlag := flag.Bool("tracer", false, "(An internal flag. Do not use manually.)") // TODO: hide disableTracerFlag := flag.Bool("disable-tracer", false, "disable connection tracer") + overlayEnable := flag.Bool("overlay-enable", false, "Enable overlay network") // Parse arguments flag.Parse() @@ -103,6 +108,16 @@ func main() { os.Exit(0) } + if *overlayEnable { + if overlayEtcd == "" { + logrus.Fatal("--overlay-etcd is not specified") + } + if overlayHostAddress == "" { + logrus.Fatal("--overlay-host-address is not specified") + } + logrus.Infof("Overlay network is enabled. Etcd address is %q", overlayEtcd) + } + if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) { logrus.Fatalf("Cannot cleanup socket file: %v", err) } @@ -208,5 +223,10 @@ func main() { os.Exit(0) }() - handler.StartHandle(*disableTracerFlag) + overlay := &bypass4netns.OverlayConfig{ + Enable: *overlayEnable, + EtcdAddress: overlayEtcd, + HostAddress: overlayHostAddress, + } + handler.StartHandle(*disableTracerFlag, overlay) } diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index 5559b2a..9aeb5c4 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -21,11 +21,13 @@ import ( ) var ( - socketFile string - comSocketFile string // socket for channel with bypass4netns - pidFile string - logFilePath string - b4nnPath string + socketFile string + comSocketFile string // socket for channel with bypass4netns + pidFile string + logFilePath string + b4nnPath string + overlayEtcd string + overlayHostAddress string ) func main() { @@ -45,7 +47,10 @@ func main() { flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.StringVar(&b4nnPath, "b4nn-executable", defaultB4nnPath, "Path to bypass4netns executable") + flag.StringVar(&overlayEtcd, "overlay-etcd", "", "Etcd address for overlay network") + flag.StringVar(&overlayHostAddress, "overlay-host-address", "", "Host address for overlay network") disableTracer := flag.Bool("disable-tracer", false, "Disable connection tracer") + overlayEnable := flag.Bool("overlay-enable", false, "Enable overlay network") debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") @@ -114,6 +119,19 @@ func main() { b4nsdDriver.DisableTracer = *disableTracer } + if *overlayEnable { + if overlayEtcd == "" { + logrus.Fatal("--overlay-etcd is not specified") + } + if overlayHostAddress == "" { + logrus.Fatal("--overlay-host-address is not specified") + } + b4nsdDriver.OverlayEnable = *overlayEnable + b4nsdDriver.OverlayEtcd = overlayEtcd + b4nsdDriver.OverlayHostAddress = overlayHostAddress + logrus.Infof("Overlay network is enabled. etcd address is %q host address is %q", b4nsdDriver.OverlayEtcd, b4nsdDriver.OverlayHostAddress) + } + waitChan := make(chan bool) go func() { err = listenServeNerdctlAPI(socketFile, &router.Backend{ diff --git a/go.mod b/go.mod index dd4ccd3..6fdb4a7 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,13 @@ require ( ) require ( + github.com/coreos/etcd v3.3.27+incompatible // indirect + github.com/coreos/go-semver v0.3.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + go.etcd.io/etcd v3.3.27+incompatible // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c12d135..bfa3afd 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,19 @@ +github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA= +github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/opencontainers/runtime-spec v1.0.3-0.20220809190508-9ee22abf867e h1:UcO9GCY5ehlR0PK20BXQ+nlb8J2LNXlwm97PryxbIek= github.com/opencontainers/runtime-spec v1.0.3-0.20220809190508-9ee22abf867e/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/oraoto/go-pidfd v0.1.2-0.20210402155345-46bf1ba22e22 h1:TBw1Dwr/0eRvVIhdgQ+qGQuJ2STNL1+bjaI7nKLCoiQ= @@ -17,12 +28,15 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/vtolstov/go-ioctl v0.0.0-20151206205506-6be9cced4810 h1:X6ps8XHfpQjw8dUStzlMi2ybiKQ2Fmdw7UM+TinwvyM= github.com/vtolstov/go-ioctl v0.0.0-20151206205506-6be9cced4810/go.mod h1:dF0BBJ2YrV1+2eAIyEI+KeSidgA6HqoIP1u5XTlMq/o= +go.etcd.io/etcd v3.3.27+incompatible h1:5hMrpf6REqTHV2LW2OclNpRtxI0k9ZplMemJsMSWju0= +go.etcd.io/etcd v3.3.27+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220818161305-2296e01440c6 h1:Sx/u41w+OwrInGdEckYmEuU5gHoGSL4QbDz3S9s6j4U= golang.org/x/sys v0.0.0-20220818161305-2296e01440c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index f250064..2867cce 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -19,6 +19,7 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" + "go.etcd.io/etcd/client" "golang.org/x/sys/unix" ) @@ -450,6 +451,15 @@ func (h *Handler) SetReadyFd(fd int) error { return nil } +type OverlayConfig struct { + Enable bool + EtcdAddress string + HostAddress string + etcdClientConfig client.Config + etcdClient client.Client + etcdKeyApi client.KeysAPI +} + type notifHandler struct { fd libseccomp.ScmpFd state *specs.ContainerProcessState @@ -464,6 +474,7 @@ type notifHandler struct { containerInterfaces map[string]containerInterface tracer *tracer.Tracer disableTracer bool + overlay *OverlayConfig } type containerInterface struct { @@ -492,7 +503,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState } // StartHandle starts seccomp notif handler -func (h *Handler) StartHandle(disableTracer bool) { +func (h *Handler) StartHandle(disableTracer bool, overlayConfig *OverlayConfig) { logrus.Info("Waiting for seccomp file descriptors") l, err := net.Listen("unix", h.socketPath) if err != nil { @@ -532,9 +543,22 @@ func (h *Handler) StartHandle(disableTracer bool) { logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) notifHandler.disableTracer = disableTracer + notifHandler.overlay = overlayConfig + if notifHandler.overlay.Enable { + notifHandler.overlay.etcdClientConfig = client.Config{ + Endpoints: []string{notifHandler.overlay.EtcdAddress}, + Transport: client.DefaultTransport, + HeaderTimeoutPerRequest: 2 * time.Second, + } + notifHandler.overlay.etcdClient, err = client.New(notifHandler.overlay.etcdClientConfig) + if err != nil { + logrus.WithError(err).Fatal("failed to create etcd client") + } + notifHandler.overlay.etcdKeyApi = client.NewKeysAPI(notifHandler.overlay.etcdClient) + } // prepare tracer agent - if !notifHandler.disableTracer { + if !notifHandler.disableTracer && !notifHandler.overlay.Enable { err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) if err != nil { logrus.WithError(err).Fatalf("failed to start tracer") @@ -569,7 +593,11 @@ func (h *Handler) StartHandle(disableTracer bool) { } go notifHandler.handle() - go notifHandler.startBackgroundTask(h.comSocketPath) + if notifHandler.overlay.Enable { + go notifHandler.startBackgroundTaskForOverlayNetwork() + } else { + go notifHandler.startBackgroundTask(h.comSocketPath) + } } } @@ -656,3 +684,41 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { time.Sleep(1 * time.Second) } } + +func (h *notifHandler) startBackgroundTaskForOverlayNetwork() { + ifLastUpdateUnix := int64(0) + for { + lastUpdated := h.nonBypassable.GetLastUpdateUnix() + if lastUpdated > ifLastUpdateUnix || ifLastUpdateUnix+10 < time.Now().Unix() { + ifs := h.nonBypassable.GetInterfaces() + for _, intf := range ifs { + if intf.IsLoopback { + continue + } + for _, addr := range intf.Addresses { + // ignore IPv6 address + if addr.IP.To4() == nil { + continue + } + for _, v := range h.forwardingPorts { + containerAddr := fmt.Sprintf("%s:%d", addr.IP, v.ChildPort) + hostAddr := fmt.Sprintf("%s:%d", h.overlay.HostAddress, v.HostPort) + // Remove entries with timeout + // TODO: Remove related entries when exiting. + opts := &client.SetOptions{ + TTL: time.Second * 15, + } + _, err := h.overlay.etcdKeyApi.Set(gocontext.TODO(), containerAddr, hostAddr, opts) + if err != nil { + logrus.WithError(err).Errorf("failed to register %s -> %s", containerAddr, hostAddr) + } else { + logrus.Infof("Registered %s -> %s", containerAddr, hostAddr) + } + } + } + } + ifLastUpdateUnix = time.Now().Unix() + } + time.Sleep(1 * time.Second) + } +} diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index ac1d00d..b944a2f 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -1,9 +1,12 @@ package bypass4netns import ( + gocontext "context" "encoding/binary" "fmt" "net" + "strconv" + "strings" "syscall" "unsafe" @@ -143,6 +146,22 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } ss.addr = destAddr + var newDestAddr net.IP + switch destAddr.Family { + case syscall.AF_INET: + newDestAddr = net.IPv4zero + newDestAddr = newDestAddr.To4() + newDestAddr[0] = 127 + newDestAddr[3] = 1 + case syscall.AF_INET6: + newDestAddr = net.IPv6loopback + newDestAddr = newDestAddr.To16() + default: + ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) + ss.state = Error + return + } + // check whether the destination is bypassed or not. connectToLoopback := false connectToInterface := false @@ -158,11 +177,38 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } } - contIf, ok := handler.containerInterfaces[destAddr.String()] - if ok { - ss.logger.Infof("destination address %v is container address and bypassed", destAddr) - fwdPort.HostPort = contIf.hostPort - connectToOtherBypassedContainer = true + if handler.overlay.Enable && destAddr.IP.IsPrivate() { + // currently, only private addresses are available in overlay network. + res, err := handler.overlay.etcdKeyApi.Get(gocontext.TODO(), destAddr.String(), nil) + if err != nil { + ss.logger.Warnf("destination address %q is not registered", destAddr.String()) + } else { + hostAddrWithPort := res.Node.Value + hostAddrs := strings.Split(hostAddrWithPort, ":") + if len(hostAddrs) != 2 { + ss.logger.Errorf("invalid address format %q", hostAddrWithPort) + ss.state = Error + return + } + hostAddr := hostAddrs[0] + hostPort, err := strconv.Atoi(hostAddrs[1]) + if err != nil { + ss.logger.Errorf("invalid address format %q", hostAddrWithPort) + ss.state = Error + return + } + newDestAddr = net.ParseIP(hostAddr) + fwdPort.HostPort = hostPort + connectToOtherBypassedContainer = true + ss.logger.Infof("destination address %v is container address and bypassed via overlay network", destAddr) + } + } else { + contIf, ok := handler.containerInterfaces[destAddr.String()] + if ok { + ss.logger.Infof("destination address %v is container address and bypassed", destAddr) + fwdPort.HostPort = contIf.hostPort + connectToOtherBypassedContainer = true + } } // check whether the destination container socket is bypassed or not. @@ -219,21 +265,15 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } if connectToInterface || connectToOtherBypassedContainer { - var addr net.IP // writing host's loopback address to connect to bypassed socket at sock_addr's address offset // TODO: should we return dummy value when getpeername(2) is called? switch destAddr.Family { case syscall.AF_INET: - // create loopback address "127.0.0.1" - addr = net.IPv4zero - addr = addr.To4() - addr[0] = 127 - addr[3] = 1 - err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, addr[0:4]) + newDestAddr = newDestAddr.To4() + err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, newDestAddr[0:4]) case syscall.AF_INET6: - addr = net.IPv6loopback - addr = addr.To16() - err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, addr[0:16]) + newDestAddr = newDestAddr.To16() + err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, newDestAddr[0:16]) default: ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) ss.state = Error @@ -245,7 +285,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { return } - ss.logger.Infof("destination address %s is rewritten to host loopback address %s", destAddr.IP, addr) + ss.logger.Infof("destination address %s is rewritten to %s", destAddr.IP, newDestAddr) } ss.state = Bypassed diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index eb88cd1..67b0143 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -24,6 +24,9 @@ type Driver struct { containerInterfaces map[string]com.ContainerInterfaces interfacesLock sync.RWMutex DisableTracer bool + OverlayEnable bool + OverlayEtcd string + OverlayHostAddress string } func NewDriver(execPath string, comSocketPath string) *Driver { @@ -35,6 +38,8 @@ func NewDriver(execPath string, comSocketPath string) *Driver { containerInterfaces: map[string]com.ContainerInterfaces{}, interfacesLock: sync.RWMutex{}, DisableTracer: false, + OverlayEnable: false, + OverlayEtcd: "", } } @@ -87,6 +92,12 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { b4nnArgs = append(b4nnArgs, "--disable-tracer=true") } + if d.OverlayEnable { + b4nnArgs = append(b4nnArgs, "--overlay-enable=true") + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--overlay-etcd=%s", d.OverlayEtcd)) + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--overlay-host-address=%s", d.OverlayHostAddress)) + } + // prepare pipe for ready notification readyR, readyW, err := os.Pipe() if err != nil { From 2abf918bb920f90bd36ad415fa47e1ac346fd14f Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 21 Nov 2023 04:54:51 +0000 Subject: [PATCH 12/55] improved based on comments Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 58 ++++++++++++++++-------------- cmd/bypass4netnsd/main.go | 46 ++++++++++++------------ go.mod | 1 - pkg/bypass4netns/bypass4netns.go | 50 +++++++++++++------------- pkg/bypass4netns/socket.go | 6 ++-- pkg/bypass4netns/tracer/tracer.go | 2 +- pkg/bypass4netnsd/bypass4netnsd.go | 25 +++++++------ 7 files changed, 97 insertions(+), 91 deletions(-) diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index 227eaf4..55ee296 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -23,14 +23,14 @@ import ( ) var ( - socketFile string - comSocketFile string - pidFile string - logFilePath string - overlayEtcd string - overlayHostAddress string - readyFd int - exitFd int + socketFile string + comSocketFile string + pidFile string + logFilePath string + multinodeEtcdAddress string + multinodeHostAddress string + readyFd int + exitFd int ) func main() { @@ -44,8 +44,8 @@ func main() { flag.StringVar(&comSocketFile, "com-socket", filepath.Join(xdgRuntimeDir, "bypass4netnsd-com.sock"), "Socket file for communication with bypass4netns") flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") - flag.StringVar(&overlayEtcd, "overlay-etcd", "", "Etcd address for overlay network") - flag.StringVar(&overlayHostAddress, "overlay-host-address", "", "Host address for overlay network") + flag.StringVar(&multinodeEtcdAddress, "multinode-etcd-address", "", "Etcd address for multinode communication") + flag.StringVar(&multinodeHostAddress, "multinode-host-address", "", "Host address for multinode communication") flag.IntVar(&readyFd, "ready-fd", -1, "File descriptor to notify when ready") flag.IntVar(&exitFd, "exit-fd", -1, "File descriptor for terminating bypass4netns") ignoredSubnets := flag.StringSlice("ignore", []string{"127.0.0.0/8"}, "Subnets to ignore in bypass4netns. Can be also set to \"auto\".") @@ -53,10 +53,10 @@ func main() { debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") - nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide - tracerFlag := flag.Bool("tracer", false, "(An internal flag. Do not use manually.)") // TODO: hide - disableTracerFlag := flag.Bool("disable-tracer", false, "disable connection tracer") - overlayEnable := flag.Bool("overlay-enable", false, "Enable overlay network") + nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide + tracerAgentFlag := flag.Bool("tracer-agent", false, "(An internal flag. Do not use manually.)") // TODO: hide + tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") + multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") // Parse arguments flag.Parse() @@ -101,21 +101,27 @@ func main() { os.Exit(0) } - if *tracerFlag { + if *tracerAgentFlag { if err := tracer.Main(); err != nil { logrus.Fatal(err) } os.Exit(0) } - if *overlayEnable { - if overlayEtcd == "" { - logrus.Fatal("--overlay-etcd is not specified") + if *tracerEnable { + if comSocketFile == "" { + logrus.Fatal("--com-socket is not specified") } - if overlayHostAddress == "" { - logrus.Fatal("--overlay-host-address is not specified") + } + + if *multinodeEnable { + if multinodeEtcdAddress == "" { + logrus.Fatal("--multinode-etcd-address is not specified") + } + if multinodeHostAddress == "" { + logrus.Fatal("--multinode-host-address is not specified") } - logrus.Infof("Overlay network is enabled. Etcd address is %q", overlayEtcd) + logrus.WithFields(logrus.Fields{"etcdAddress": multinodeEtcdAddress, "hostAddress": multinodeHostAddress}).Infof("Multinode communication is enabled.") } if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) { @@ -223,10 +229,10 @@ func main() { os.Exit(0) }() - overlay := &bypass4netns.OverlayConfig{ - Enable: *overlayEnable, - EtcdAddress: overlayEtcd, - HostAddress: overlayHostAddress, + multinode := &bypass4netns.MultinodeConfig{ + Enable: *multinodeEnable, + EtcdAddress: multinodeEtcdAddress, + HostAddress: multinodeHostAddress, } - handler.StartHandle(*disableTracerFlag, overlay) + handler.StartHandle(*tracerEnable, multinode) } diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index 9aeb5c4..708cf10 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -21,13 +21,13 @@ import ( ) var ( - socketFile string - comSocketFile string // socket for channel with bypass4netns - pidFile string - logFilePath string - b4nnPath string - overlayEtcd string - overlayHostAddress string + socketFile string + comSocketFile string // socket for channel with bypass4netns + pidFile string + logFilePath string + b4nnPath string + multinodeEtcdAddress string + multinodeHostAddress string ) func main() { @@ -47,10 +47,10 @@ func main() { flag.StringVar(&pidFile, "pid-file", "", "Pid file") flag.StringVar(&logFilePath, "log-file", "", "Output logs to file") flag.StringVar(&b4nnPath, "b4nn-executable", defaultB4nnPath, "Path to bypass4netns executable") - flag.StringVar(&overlayEtcd, "overlay-etcd", "", "Etcd address for overlay network") - flag.StringVar(&overlayHostAddress, "overlay-host-address", "", "Host address for overlay network") - disableTracer := flag.Bool("disable-tracer", false, "Disable connection tracer") - overlayEnable := flag.Bool("overlay-enable", false, "Enable overlay network") + flag.StringVar(&multinodeEtcdAddress, "multinode-etcd-address", "", "Etcd address for multinode communication") + flag.StringVar(&multinodeHostAddress, "multinode-host-address", "", "Host address for multinode communication") + tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") + multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") help := flag.Bool("help", false, "Show help") @@ -114,22 +114,22 @@ func main() { b4nsdDriver := bypass4netnsd.NewDriver(b4nnPath, comSocketFile) - if *disableTracer { - logrus.Info("Connection tracer is disabled") - b4nsdDriver.DisableTracer = *disableTracer + if *tracerEnable { + logrus.Info("Connection tracer is enabled") + b4nsdDriver.TracerEnable = *tracerEnable } - if *overlayEnable { - if overlayEtcd == "" { - logrus.Fatal("--overlay-etcd is not specified") + if *multinodeEnable { + if multinodeEtcdAddress == "" { + logrus.Fatal("--multinode-etcd-address is not specified") } - if overlayHostAddress == "" { - logrus.Fatal("--overlay-host-address is not specified") + if multinodeHostAddress == "" { + logrus.Fatal("--multinode-host-address is not specified") } - b4nsdDriver.OverlayEnable = *overlayEnable - b4nsdDriver.OverlayEtcd = overlayEtcd - b4nsdDriver.OverlayHostAddress = overlayHostAddress - logrus.Infof("Overlay network is enabled. etcd address is %q host address is %q", b4nsdDriver.OverlayEtcd, b4nsdDriver.OverlayHostAddress) + b4nsdDriver.MultinodeEnable = *multinodeEnable + b4nsdDriver.MultinodeEtcdAddress = multinodeEtcdAddress + b4nsdDriver.MultinodeHostAddress = multinodeHostAddress + logrus.WithFields(logrus.Fields{"etcdAddress": multinodeEtcdAddress, "hostAddress": multinodeHostAddress}).Info("Multinode communication is enabled.") } waitChan := make(chan bool) diff --git a/go.mod b/go.mod index 6fdb4a7..8df6a9d 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,5 @@ require ( github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - go.etcd.io/etcd v3.3.27+incompatible // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 2867cce..b43016f 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -12,6 +12,7 @@ import ( "syscall" "time" + "github.com/coreos/etcd/client" "github.com/opencontainers/runtime-spec/specs-go" "github.com/oraoto/go-pidfd" "github.com/rootless-containers/bypass4netns/pkg/api/com" @@ -19,10 +20,11 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" - "go.etcd.io/etcd/client" "golang.org/x/sys/unix" ) +const ETCD_MULTINODE_PREFIX = "bypass4netns/multinode/" + func closeStateFds(recvFds []int) { for i := range recvFds { unix.Close(i) @@ -451,7 +453,7 @@ func (h *Handler) SetReadyFd(fd int) error { return nil } -type OverlayConfig struct { +type MultinodeConfig struct { Enable bool EtcdAddress string HostAddress string @@ -473,8 +475,8 @@ type notifHandler struct { // key is destination address e.g. "192.168.1.1:1000" containerInterfaces map[string]containerInterface tracer *tracer.Tracer - disableTracer bool - overlay *OverlayConfig + tracerEnable bool + multinode *MultinodeConfig } type containerInterface struct { @@ -503,7 +505,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState } // StartHandle starts seccomp notif handler -func (h *Handler) StartHandle(disableTracer bool, overlayConfig *OverlayConfig) { +func (h *Handler) StartHandle(enableTracer bool, multinodeConfig *MultinodeConfig) { logrus.Info("Waiting for seccomp file descriptors") l, err := net.Listen("unix", h.socketPath) if err != nil { @@ -542,23 +544,23 @@ func (h *Handler) StartHandle(disableTracer bool, overlayConfig *OverlayConfig) logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) - notifHandler.disableTracer = disableTracer - notifHandler.overlay = overlayConfig - if notifHandler.overlay.Enable { - notifHandler.overlay.etcdClientConfig = client.Config{ - Endpoints: []string{notifHandler.overlay.EtcdAddress}, + notifHandler.tracerEnable = enableTracer + notifHandler.multinode = multinodeConfig + if notifHandler.multinode.Enable { + notifHandler.multinode.etcdClientConfig = client.Config{ + Endpoints: []string{notifHandler.multinode.EtcdAddress}, Transport: client.DefaultTransport, HeaderTimeoutPerRequest: 2 * time.Second, } - notifHandler.overlay.etcdClient, err = client.New(notifHandler.overlay.etcdClientConfig) + notifHandler.multinode.etcdClient, err = client.New(notifHandler.multinode.etcdClientConfig) if err != nil { logrus.WithError(err).Fatal("failed to create etcd client") } - notifHandler.overlay.etcdKeyApi = client.NewKeysAPI(notifHandler.overlay.etcdClient) + notifHandler.multinode.etcdKeyApi = client.NewKeysAPI(notifHandler.multinode.etcdClient) } // prepare tracer agent - if !notifHandler.disableTracer && !notifHandler.overlay.Enable { + if notifHandler.tracerEnable && !notifHandler.multinode.Enable { err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) if err != nil { logrus.WithError(err).Fatalf("failed to start tracer") @@ -592,16 +594,16 @@ func (h *Handler) StartHandle(disableTracer bool, overlayConfig *OverlayConfig) logrus.Infof("tracer is disabled") } - go notifHandler.handle() - if notifHandler.overlay.Enable { - go notifHandler.startBackgroundTaskForOverlayNetwork() - } else { - go notifHandler.startBackgroundTask(h.comSocketPath) + if notifHandler.multinode.Enable { + go notifHandler.startBackgroundMultinodeTask() + } else if notifHandler.tracerEnable { + go notifHandler.startBackgroundTracerTask(h.comSocketPath) } + go notifHandler.handle() } } -func (h *notifHandler) startBackgroundTask(comSocketPath string) { +func (h *notifHandler) startBackgroundTracerTask(comSocketPath string) { logrus.Info("Started bypass4netns background task") comClient, err := com.NewComClient(comSocketPath) if err != nil { @@ -657,7 +659,7 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { containerIf[dstAddr] = contIf continue } - if !h.disableTracer { + if h.tracerEnable { addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) if err != nil { logrus.WithError(err).Debugf("failed to connect to %s", dstAddr) @@ -667,8 +669,8 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { logrus.Debugf("failed to connect to %s", dstAddr) continue } + logrus.Debugf("successfully connected to %s", dstAddr) } - logrus.Debugf("successfully connected to %s", dstAddr) containerIf[dstAddr] = containerInterface{ containerID: cont.ContainerID, hostPort: hostPort, @@ -685,7 +687,7 @@ func (h *notifHandler) startBackgroundTask(comSocketPath string) { } } -func (h *notifHandler) startBackgroundTaskForOverlayNetwork() { +func (h *notifHandler) startBackgroundMultinodeTask() { ifLastUpdateUnix := int64(0) for { lastUpdated := h.nonBypassable.GetLastUpdateUnix() @@ -702,13 +704,13 @@ func (h *notifHandler) startBackgroundTaskForOverlayNetwork() { } for _, v := range h.forwardingPorts { containerAddr := fmt.Sprintf("%s:%d", addr.IP, v.ChildPort) - hostAddr := fmt.Sprintf("%s:%d", h.overlay.HostAddress, v.HostPort) + hostAddr := fmt.Sprintf("%s:%d", h.multinode.HostAddress, v.HostPort) // Remove entries with timeout // TODO: Remove related entries when exiting. opts := &client.SetOptions{ TTL: time.Second * 15, } - _, err := h.overlay.etcdKeyApi.Set(gocontext.TODO(), containerAddr, hostAddr, opts) + _, err := h.multinode.etcdKeyApi.Set(gocontext.TODO(), ETCD_MULTINODE_PREFIX+containerAddr, hostAddr, opts) if err != nil { logrus.WithError(err).Errorf("failed to register %s -> %s", containerAddr, hostAddr) } else { diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index b944a2f..7619137 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -177,9 +177,9 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } } - if handler.overlay.Enable && destAddr.IP.IsPrivate() { - // currently, only private addresses are available in overlay network. - res, err := handler.overlay.etcdKeyApi.Get(gocontext.TODO(), destAddr.String(), nil) + if handler.multinode.Enable && destAddr.IP.IsPrivate() { + // currently, only private addresses are available in multinode communication. + res, err := handler.multinode.etcdKeyApi.Get(gocontext.TODO(), ETCD_MULTINODE_PREFIX+destAddr.String(), nil) if err != nil { ss.logger.Warnf("destination address %q is not registered", destAddr.String()) } else { diff --git a/pkg/bypass4netns/tracer/tracer.go b/pkg/bypass4netns/tracer/tracer.go index 164dd46..b40f1dd 100644 --- a/pkg/bypass4netns/tracer/tracer.go +++ b/pkg/bypass4netns/tracer/tracer.go @@ -49,7 +49,7 @@ func (x *Tracer) StartTracer(ctx context.Context, pid int) error { if !ok { nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") } - nsenterFlags = append(nsenterFlags, "--", selfExe, "--tracer", "--log-file", x.logPath) + nsenterFlags = append(nsenterFlags, "--", selfExe, "--tracer-agent", "--log-file", x.logPath) x.tracerCmd = exec.CommandContext(ctx, nsenter, nsenterFlags...) x.tracerCmd.SysProcAttr = &unix.SysProcAttr{ Pdeathsig: unix.SIGTERM, diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index 67b0143..66ce2c2 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -23,10 +23,10 @@ type Driver struct { lock sync.RWMutex containerInterfaces map[string]com.ContainerInterfaces interfacesLock sync.RWMutex - DisableTracer bool - OverlayEnable bool - OverlayEtcd string - OverlayHostAddress string + TracerEnable bool + MultinodeEnable bool + MultinodeEtcdAddress string + MultinodeHostAddress string } func NewDriver(execPath string, comSocketPath string) *Driver { @@ -37,9 +37,8 @@ func NewDriver(execPath string, comSocketPath string) *Driver { lock: sync.RWMutex{}, containerInterfaces: map[string]com.ContainerInterfaces{}, interfacesLock: sync.RWMutex{}, - DisableTracer: false, - OverlayEnable: false, - OverlayEtcd: "", + TracerEnable: false, + MultinodeEnable: false, } } @@ -88,14 +87,14 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { } b4nnArgs = append(b4nnArgs, fmt.Sprintf("--com-socket=%s", d.ComSocketPath)) - if d.DisableTracer { - b4nnArgs = append(b4nnArgs, "--disable-tracer=true") + if d.TracerEnable { + b4nnArgs = append(b4nnArgs, "--tracer=true") } - if d.OverlayEnable { - b4nnArgs = append(b4nnArgs, "--overlay-enable=true") - b4nnArgs = append(b4nnArgs, fmt.Sprintf("--overlay-etcd=%s", d.OverlayEtcd)) - b4nnArgs = append(b4nnArgs, fmt.Sprintf("--overlay-host-address=%s", d.OverlayHostAddress)) + if d.MultinodeEnable { + b4nnArgs = append(b4nnArgs, "--multinode=true") + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--multinode-etcd-address=%s", d.MultinodeEtcdAddress)) + b4nnArgs = append(b4nnArgs, fmt.Sprintf("--multinode-host-address=%s", d.MultinodeHostAddress)) } // prepare pipe for ready notification From a4b692cd2e92f4a95a11480ab43135c49fbf3bbe Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 21 Nov 2023 07:39:09 +0000 Subject: [PATCH 13/55] fix tests Signed-off-by: Naoki MATSUMOTO --- Vagrantfile | 24 ++++++++-- test/seccomp.json.sh | 8 ++-- test/test.sh | 112 ++++++++++++++++++++++--------------------- 3 files changed, 81 insertions(+), 63 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index b940cd2..217a812 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -19,18 +19,31 @@ Vagrant.configure("2") do |config| #!/bin/bash set -eu -o pipefail - NERDCTL_VERSION="0.22.2" + GO_VERSION="1.21.4" + NERDCTL_VERSION="1.7.0" ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" echo "===== Prepare =====" ( set -x sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap golang python3 + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config systemctl --user start dbus - curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxzv /usr/local + curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | sudo tar Cxz /usr/local + echo "export PATH=$PATH:/usr/local/go/bin" >> ~/.profile + source ~/.profile + + curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxz /usr/local containerd-rootless-setuptool.sh install containerd-rootless-setuptool.sh install-buildkit + + # build nerdctl with bypass4netns + curl -fsSL https://github.com/containerd/nerdctl/archive/refs/tags/v${NERDCTL_VERSION}.tar.gz | tar Cxz ~/ + cd ~/nerdctl-${NERDCTL_VERSION} + echo "replace github.com/rootless-containers/bypass4netns => /vagrant" >> go.mod + make + sudo rm -f /usr/local/bin/nerdctl + sudo cp _output/nerdctl /usr/local/bin/nerdctl nerdctl info nerdctl pull --quiet "${ALPINE_IMAGE}" @@ -45,7 +58,7 @@ Vagrant.configure("2") do |config| systemd-run --user --unit run-iperf3 iperf3 -s ) - echo "===== `--ignore` option test =====" + echo "===== '--ignore' option test =====" ( set -x systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug @@ -53,7 +66,7 @@ Vagrant.configure("2") do |config| nerdctl exec test apk add --no-cache iperf3 nerdctl exec test iperf3 -c $(cat /tmp/host_ip) # TODO: this check is dirty. we want better method to check the connect(2) is ignored. - journalctl --user -u run-bypass4netns.service | grep "is ignored, skipping." + journalctl --user -u run-bypass4netns.service | grep "is not bypassed" nerdctl rm -f test systemctl --user stop run-bypass4netns.service @@ -71,6 +84,7 @@ Vagrant.configure("2") do |config| echo "===== Test bypass4netnsd =====" ( set -x + source ~/.profile /vagrant/test/test_b4nnd.sh ) diff --git a/test/seccomp.json.sh b/test/seccomp.json.sh index efa7f85..df6596d 100755 --- a/test/seccomp.json.sh +++ b/test/seccomp.json.sh @@ -22,9 +22,11 @@ cat < /dev/null & nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 --count 2 &> /dev/null & nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 +## NOTICE ## +# currently, bypass4netns supports only TCP. Tests for udp connections are disabled. # test_connect udp -python3 test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 +#python3 test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_connect.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_connect.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 # check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test connect over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test connect over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi echo "test_connect done." -echo "test_sendto starting..." -# test_sendto tcp -python3 test_sendto.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 - -# test_sendto udp -python3 test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 - -# check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test sendto over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi -echo "test_sendto done." - -echo "test_sendmsg starting..." -# test_sendmsg tcp -python3 test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 - -# test_sendmsg udp -python3 test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_host & -nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & -nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 -sleep 5 - -# check server is not timedout -RESULT=`cat /tmp/test_host /tmp/test_test2` -if [[ "$RESULT" == *timeout* ]]; then - echo "test sendto over udp failed" - cat /tmp/test_host - cat /tmp/test_test2 - exit 1 -fi -echo "test_sendmsg done." +#echo "test_sendto starting..." +## test_sendto tcp +#python3 test_sendto.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 +# +## test_sendto udp +#python3 test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendto.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendto.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 +# +## check server is not timedout +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test sendto over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi +#echo "test_sendto done." +# +#echo "test_sendmsg starting..." +## test_sendmsg tcp +#python3 test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 --count 2 &> /dev/null & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP --count 2 +# +## test_sendmsg udp +#python3 test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_host & +#nerdctl exec $TEST_CONTAINER_2 python3 /tmp/test_sendmsg.py -s -p 8888 -u --count 2 &> /tmp/test_test2 & +#nerdctl exec $TEST_CONTAINER_1 python3 /tmp/test_sendmsg.py -c -p 8888 --host-ip $HOST_IP --netns-ip $NETNS_IP -u --count 2 +#sleep 5 +# +## check server is not timedout +#RESULT=`cat /tmp/test_host /tmp/test_test2` +#if [[ "$RESULT" == *timeout* ]]; then +# echo "test sendto over udp failed" +# cat /tmp/test_host +# cat /tmp/test_test2 +# exit 1 +#fi +#echo "test_sendmsg done." nerdctl rm -f $TEST_CONTAINER_2 nerdctl rm -f $TEST_CONTAINER_1 -rm /tmp/test_host /tmp/test_test2 +# rm /tmp/test_host /tmp/test_test2 From 796e50b80685cc004ec75755d4916efd0f3b17e0 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 21 Nov 2023 07:39:48 +0000 Subject: [PATCH 14/55] FIXME: add current branch for CI Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c299609..db6a51f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -3,6 +3,7 @@ on: push: branches: - master + - ng-b4ns - 'release/**' pull_request: jobs: From dea596a9a05dd171b691ce04a7336deb1867d086 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 21 Nov 2023 07:51:36 +0000 Subject: [PATCH 15/55] fix lint Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/sockaddr.go | 10 ++++++++-- pkg/bypass4netns/socket.go | 5 ++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/bypass4netns/sockaddr.go b/pkg/bypass4netns/sockaddr.go index 2837afc..4cc9da7 100644 --- a/pkg/bypass4netns/sockaddr.go +++ b/pkg/bypass4netns/sockaddr.go @@ -79,7 +79,10 @@ func (sa *sockaddr) toBytes() ([]byte, error) { copy(addr4.Addr[:], sa.IP.To4()[:]) addr4.Port = endian.Uint16(p) - binary.Write(&res, endian, addr4) + err := binary.Write(&res, endian, addr4) + if err != nil { + return nil, err + } case syscall.AF_INET6: addr6 := syscall.RawSockaddrInet6{} addr6.Family = syscall.AF_INET6 @@ -88,7 +91,10 @@ func (sa *sockaddr) toBytes() ([]byte, error) { addr6.Port = endian.Uint16(p) addr6.Flowinfo = sa.Flowinfo addr6.Scope_id = sa.ScopeID - binary.Write(&res, endian, addr6) + err := binary.Write(&res, endian, addr6) + if err != nil { + return nil, err + } default: return nil, fmt.Errorf("expected AF_INET or AF_INET6, got %d", sa.Family) } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 7619137..b76e640 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -97,14 +97,14 @@ func newSocketStatus(pid uint32, sockfd int, sockDomain, sockType, sockProto int } } -func (ss *socketStatus) handleSysSetsockopt(ctx *context) error { +func (ss *socketStatus) handleSysSetsockopt(ctx *context) { ss.logger.Debug("handle setsockopt") level := ctx.req.Data.Args[1] optname := ctx.req.Data.Args[2] optlen := ctx.req.Data.Args[4] optval, err := readProcMem(ctx.req.Pid, ctx.req.Data.Args[3], optlen) if err != nil { - return fmt.Errorf("readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) + ss.logger.Errorf("setsockopt readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) } value := socketOption{ @@ -116,7 +116,6 @@ func (ss *socketStatus) handleSysSetsockopt(ctx *context) error { ss.socketOptions = append(ss.socketOptions, value) ss.logger.Debugf("setsockopt level=%d optname=%d optval=%v optlen=%d was recorded.", level, optname, optval, optlen) - return nil } func (ss *socketStatus) handleSysFcntl(ctx *context) { From b326eecc9089f3d9b6184df11942e7833a9334f6 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 21 Nov 2023 17:07:00 +0000 Subject: [PATCH 16/55] run tests in lxc container Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 40 +++++---- Vagrantfile | 136 ----------------------------- launch_test_lxc.sh | 10 +++ test/Dockerfile | 2 +- test/init_test.sh | 66 ++++++++++++++ test/lxd.yaml | 38 ++++++++ test/run_test.sh | 97 ++++++++++++++++++++ test/{test.sh => test_syscalls.sh} | 0 8 files changed, 237 insertions(+), 152 deletions(-) delete mode 100644 Vagrantfile create mode 100755 launch_test_lxc.sh create mode 100755 test/init_test.sh create mode 100644 test/lxd.yaml create mode 100755 test/run_test.sh rename test/{test.sh => test_syscalls.sh} (100%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index db6a51f..1acf70c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,11 +1,12 @@ +--- name: Test on: push: branches: - master - ng-b4ns - - 'release/**' - pull_request: + - release/** + pull_request: null jobs: golangci-lint: runs-on: ubuntu-22.04 @@ -23,18 +24,27 @@ jobs: with: version: v1.49.0 args: --verbose - ubuntu-2204: - name: "Ubuntu 22.04" - # nested virtualization is only available on macOS hosts - runs-on: macos-12 - timeout-minutes: 30 + ubuntu-2204-on-lxc: + name: Ubuntu 22.04 on LXC + runs-on: ubuntu-22.04 + timeout-minutes: 20 steps: - - uses: actions/checkout@v3 - - - name: "Cache ~/.vagrant.d/boxes" - uses: actions/cache@v3 - with: - path: ~/.vagrant.d/boxes - key: vagrant-${{ hashFiles('Vagrantfile*') }} + - uses: actions/checkout@v3.0.2 + #- name: install lxd (v5.19) + # run: sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 + - name: configure lxd + run: cat test/lxd.yaml | sudo lxd init --preseed && sudo sysctl -w net.ipv4.ip_forward=1 + # thanks to https://andreas.scherbaum.la/post/2023-01-18_fix-lxc-network-issues-in-ubuntu-22.04/ + - name: Disable Docker Firewall 1 + run: sudo iptables -I DOCKER-USER -i lxdbr0 -o eth0 -j ACCEPT + - name: Disable Docker Firewall 2 + run: sudo iptables -I DOCKER-USER -o lxdbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + - name: debug + run: sudo lxc network list && sudo iptables -t nat -L && sudo ufw status && sudo nft list table inet lxd + - name: launch lxc container + run: ./launch_test_lxc.sh + - name: install dependencies and build + run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh + - name: run tests + run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/test/run_test.sh - - run: vagrant up diff --git a/Vagrantfile b/Vagrantfile deleted file mode 100644 index 217a812..0000000 --- a/Vagrantfile +++ /dev/null @@ -1,136 +0,0 @@ -# -*- mode: ruby -*- -# vi: set ft=ruby : - -Vagrant.configure("2") do |config| - config.vm.box = "ubuntu/jammy64" - memory = 4096 - cpus = 2 - config.vm.provider :virtualbox do |v| - v.memory = memory - v.cpus = cpus - # Avoid 10.0.0.0/8 and 172.0.0.0/8: https://github.com/rootless-containers/bypass4netns/pull/5#issuecomment-1026602768 - v.customize ["modifyvm", :id, "--natnet1", "192.168.6.0/24"] - end - config.vm.provider :libvirt do |v| - v.memory = memory - v.cpus = cpus - end - config.vm.provision "shell", privileged: false, inline: <<~SHELL - #!/bin/bash - set -eu -o pipefail - - GO_VERSION="1.21.4" - NERDCTL_VERSION="1.7.0" - ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" - echo "===== Prepare =====" - ( - set -x - sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config - systemctl --user start dbus - - curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | sudo tar Cxz /usr/local - echo "export PATH=$PATH:/usr/local/go/bin" >> ~/.profile - source ~/.profile - - curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxz /usr/local - containerd-rootless-setuptool.sh install - containerd-rootless-setuptool.sh install-buildkit - - # build nerdctl with bypass4netns - curl -fsSL https://github.com/containerd/nerdctl/archive/refs/tags/v${NERDCTL_VERSION}.tar.gz | tar Cxz ~/ - cd ~/nerdctl-${NERDCTL_VERSION} - echo "replace github.com/rootless-containers/bypass4netns => /vagrant" >> go.mod - make - sudo rm -f /usr/local/bin/nerdctl - sudo cp _output/nerdctl /usr/local/bin/nerdctl - nerdctl info - nerdctl pull --quiet "${ALPINE_IMAGE}" - - cd /vagrant - make - sudo rm -f /usr/local/bin/bypass4netns* - sudo make install - - hostname -I | awk '{print $1}' | tee /tmp/host_ip - /vagrant/test/seccomp.json.sh | tee /tmp/seccomp.json - - systemd-run --user --unit run-iperf3 iperf3 -s - ) - - echo "===== '--ignore' option test =====" - ( - set -x - systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug - nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c $(cat /tmp/host_ip) - # TODO: this check is dirty. we want better method to check the connect(2) is ignored. - journalctl --user -u run-bypass4netns.service | grep "is not bypassed" - nerdctl rm -f test - systemctl --user stop run-bypass4netns.service - - ) - - echo "===== connect(2),sendto(2) test =====" - ( - systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 - set -x - cd /vagrant/test - /bin/bash test.sh /tmp/seccomp.json $(cat /tmp/host_ip) - systemctl --user stop run-bypass4netns.service - ) - - echo "===== Test bypass4netnsd =====" - ( - set -x - source ~/.profile - /vagrant/test/test_b4nnd.sh - ) - - echo "===== Benchmark: netns -> host With bypass4netns =====" - ( - set -x - - # start bypass4netnsd for nerdctl integration - systemd-run --user --unit run-bypass4netnsd bypass4netnsd - sleep 1 - nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test - ) - - echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" - ( - set -x - nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test - ) - - echo "===== Benchmark: host -> netns With bypass4netns =====" - ( - set -x - nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 - sleep 1 # waiting `iperf3 -s -4` becomes ready - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test - ) - - echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" - ( - set -x - nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 - sleep 1 - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test - ) - - SHELL -end diff --git a/launch_test_lxc.sh b/launch_test_lxc.sh new file mode 100755 index 0000000..6f4e476 --- /dev/null +++ b/launch_test_lxc.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +cd $(dirname $0) + +# lxd init --auto --storage-backend=btrfs +sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test +sudo lxc config device add test share disk source=$(pwd) path=/host +sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" +# let user services running +sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && sudo loginctl enable-linger" diff --git a/test/Dockerfile b/test/Dockerfile index 1922566..b9a4029 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/docker/library/alpine:3.15 +FROM public.ecr.aws/docker/library/alpine:3.16 RUN apk add python3 diff --git a/test/init_test.sh b/test/init_test.sh new file mode 100755 index 0000000..0a0793d --- /dev/null +++ b/test/init_test.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +set -eu -o pipefail + +TEST_USER=ubuntu + +if [ "$(whoami)" != "$TEST_USER" ]; then + su $TEST_USER -c $0 + exit 0 +fi + +GO_VERSION="1.21.4" +NERDCTL_VERSION="1.7.0" + +echo "===== Prepare =====" +( + set -x + + sudo cp -r /host ~/bypass4netns + sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns + + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables + systemctl --user start dbus + + curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | sudo tar Cxz /usr/local + echo "export PATH=$PATH:/usr/local/go/bin" >> ~/.profile + source ~/.profile + + curl -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz | sudo tar Cxz /usr/local + containerd-rootless-setuptool.sh install + containerd-rootless-setuptool.sh install-buildkit + + containerd-rootless-setuptool.sh install-fuse-overlayfs + cat << EOF >> /home/$TEST_USER/.config/containerd/config.toml +[proxy_plugins] + [proxy_plugins."fuse-overlayfs"] + type = "snapshot" + address = "/run/user/1000/containerd-fuse-overlayfs.sock" +EOF + + systemctl restart --user containerd + echo 'export CONTAINERD_SNAPSHOTTER="fuse-overlayfs"' >> ~/.profile + source ~/.profile + + # build nerdctl with bypass4netns + curl -fsSL https://github.com/containerd/nerdctl/archive/refs/tags/v${NERDCTL_VERSION}.tar.gz | tar Cxz ~/ + cd ~/nerdctl-${NERDCTL_VERSION} + echo "replace github.com/rootless-containers/bypass4netns => /home/$TEST_USER/bypass4netns" >> go.mod + make + sudo rm -f /usr/local/bin/nerdctl + sudo cp _output/nerdctl /usr/local/bin/nerdctl + nerdctl info + + cd ~/bypass4netns + make + sudo rm -f /usr/local/bin/bypass4netns* + sudo make install + + hostname -I | awk '{print $1}' | tee /tmp/host_ip + ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json + + + + systemd-run --user --unit run-iperf3 iperf3 -s +) \ No newline at end of file diff --git a/test/lxd.yaml b/test/lxd.yaml new file mode 100644 index 0000000..26fc8b0 --- /dev/null +++ b/test/lxd.yaml @@ -0,0 +1,38 @@ +config: {} +networks: +- config: + ipv4.address: 192.168.6.1/24 + ipv4.nat: "true" + ipv6.address: none + description: "" + name: lxdbr0 + type: bridge + project: default +storage_pools: +- config: + size: 30GiB + source: /var/snap/lxd/common/lxd/disks/default.img + description: "" + name: default + driver: btrfs +profiles: +- config: {} + description: Default LXD profile + devices: + eth0: + name: eth0 + network: lxdbr0 + type: nic + root: + path: / + pool: default + type: disk + name: default +projects: +- config: + features.images: "true" + features.networks: "true" + features.profiles: "true" + features.storage.volumes: "true" + description: Default LXD project + name: default \ No newline at end of file diff --git a/test/run_test.sh b/test/run_test.sh new file mode 100755 index 0000000..ba7e170 --- /dev/null +++ b/test/run_test.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +set -eu -o pipefail + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" +nerdctl pull --quiet "${ALPINE_IMAGE}" + +SCRIPT_DIR=$(cd $(dirname $0); pwd) +cd $SCRIPT_DIR + +echo "===== '--ignore' option test =====" +( + set -x + systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug + nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $(cat /tmp/host_ip) -t 1 + # TODO: this check is dirty. we want better method to check the connect(2) is ignored. + journalctl --user -u run-bypass4netns.service | grep "is not bypassed" + nerdctl rm -f test + systemctl --user stop run-bypass4netns.service +) + +# nerdctl image build not working. +#[+] Building 10.1s (2/2) FINISHED +# => [internal] load build definition from Dockerfile 0.0s +# => => transferring dockerfile: 274B 0.0s +# => ERROR [internal] load metadata for public.ecr.aws/docker/library/alpine:3.16 10.0s +#------ +# > [internal] load metadata for public.ecr.aws/docker/library/alpine:3.16: +#------ +#Dockerfile:1 +#-------------------- +# 1 | >>> FROM public.ecr.aws/docker/library/alpine:3.16 +# 2 | +# 3 | RUN apk add python3 +#-------------------- +#error: failed to solve: public.ecr.aws/docker/library/alpine:3.16: failed to do request: Head "https://public.ecr.aws/v2/docker/library/alpine/manifests/3.16": dial tcp: lookup public.ecr.aws on 10.0.2.3:53: read udp 10.0.2.100:47105->10.0.2.3:53: i/o timeout +#echo "===== connect(2),sendto(2) test =====" +#( +# systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 +# set -x +# cd $SCRIPT_DIR/test +# /bin/bash test_syscalls.sh /tmp/seccomp.json $(cat /tmp/host_ip) +# systemctl --user stop run-bypass4netns.service +#) + +echo "===== Test bypass4netnsd =====" +( + set -x + source ~/.profile + ./test_b4nnd.sh +) + +echo "===== Benchmark: netns -> host With bypass4netns =====" +( + set -x + + # start bypass4netnsd for nerdctl integration + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" + nerdctl rm -f test +) + +echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" +( + set -x + nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns With bypass4netns =====" +( + set -x + nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 + sleep 1 # waiting `iperf3 -s -4` becomes ready + iperf3 -c "$(cat /tmp/host_ip)" -p 8080 + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" +( + set -x + nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 + sleep 1 + iperf3 -c "$(cat /tmp/host_ip)" -p 8080 + nerdctl rm -f test +) \ No newline at end of file diff --git a/test/test.sh b/test/test_syscalls.sh similarity index 100% rename from test/test.sh rename to test/test_syscalls.sh From 8b035864a0b4bb5fa42aa0d90086f39efaf7d10d Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 22 Nov 2023 19:29:46 +0000 Subject: [PATCH 17/55] add option for c2c communications Signed-off-by: Naoki MATSUMOTO --- cmd/bypass4netns/main.go | 15 +++++++++++++-- cmd/bypass4netnsd/main.go | 13 +++++++++++++ pkg/bypass4netns/bypass4netns.go | 26 +++++++++++++++++--------- pkg/bypass4netns/socket.go | 5 +++-- pkg/bypass4netnsd/bypass4netnsd.go | 4 ++++ 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index 55ee296..946b7b9 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -55,6 +55,7 @@ func main() { help := flag.Bool("help", false, "Show help") nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide tracerAgentFlag := flag.Bool("tracer-agent", false, "(An internal flag. Do not use manually.)") // TODO: hide + handleC2cEnable := flag.Bool("handle-c2c-connections", false, "Handle connections between containers") tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") @@ -108,12 +109,18 @@ func main() { os.Exit(0) } - if *tracerEnable { + if *handleC2cEnable { if comSocketFile == "" { logrus.Fatal("--com-socket is not specified") } } + if *tracerEnable { + if !*handleC2cEnable { + logrus.Fatal("--handle-c2c-connections is not enabled") + } + } + if *multinodeEnable { if multinodeEtcdAddress == "" { logrus.Fatal("--multinode-etcd-address is not specified") @@ -229,10 +236,14 @@ func main() { os.Exit(0) }() + c2cConfig := &bypass4netns.C2CConnectionHandleConfig{ + Enable: *handleC2cEnable, + TracerEnable: *tracerEnable, + } multinode := &bypass4netns.MultinodeConfig{ Enable: *multinodeEnable, EtcdAddress: multinodeEtcdAddress, HostAddress: multinodeHostAddress, } - handler.StartHandle(*tracerEnable, multinode) + handler.StartHandle(c2cConfig, multinode) } diff --git a/cmd/bypass4netnsd/main.go b/cmd/bypass4netnsd/main.go index 708cf10..9e2aed2 100644 --- a/cmd/bypass4netnsd/main.go +++ b/cmd/bypass4netnsd/main.go @@ -50,6 +50,7 @@ func main() { flag.StringVar(&multinodeEtcdAddress, "multinode-etcd-address", "", "Etcd address for multinode communication") flag.StringVar(&multinodeHostAddress, "multinode-host-address", "", "Host address for multinode communication") tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") + handleC2cEnable := flag.Bool("handle-c2c-connections", false, "Handle connections between containers") multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") debug := flag.Bool("debug", false, "Enable debug mode") version := flag.Bool("version", false, "Show version") @@ -114,7 +115,19 @@ func main() { b4nsdDriver := bypass4netnsd.NewDriver(b4nnPath, comSocketFile) + if *handleC2cEnable && *multinodeEnable { + logrus.Fatal("--handle-c2c-connections and multinode cannot be enabled at the sametime") + } + + if *handleC2cEnable { + logrus.Info("Handling connections between containers") + b4nsdDriver.HandleC2CEnable = *handleC2cEnable + } + if *tracerEnable { + if !*handleC2cEnable { + logrus.Fatal("--handle-c2c-connections is not enabled") + } logrus.Info("Connection tracer is enabled") b4nsdDriver.TracerEnable = *tracerEnable } diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index b43016f..e6b6ade 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -462,6 +462,11 @@ type MultinodeConfig struct { etcdKeyApi client.KeysAPI } +type C2CConnectionHandleConfig struct { + Enable bool + TracerEnable bool +} + type notifHandler struct { fd libseccomp.ScmpFd state *specs.ContainerProcessState @@ -474,8 +479,7 @@ type notifHandler struct { // key is destination address e.g. "192.168.1.1:1000" containerInterfaces map[string]containerInterface - tracer *tracer.Tracer - tracerEnable bool + c2cConnections *C2CConnectionHandleConfig multinode *MultinodeConfig } @@ -505,7 +509,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState } // StartHandle starts seccomp notif handler -func (h *Handler) StartHandle(enableTracer bool, multinodeConfig *MultinodeConfig) { +func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeConfig *MultinodeConfig) { logrus.Info("Waiting for seccomp file descriptors") l, err := net.Listen("unix", h.socketPath) if err != nil { @@ -544,7 +548,7 @@ func (h *Handler) StartHandle(enableTracer bool, multinodeConfig *MultinodeConfi logrus.Infof("Received new seccomp fd: %v", newFd) notifHandler := h.newNotifHandler(newFd, state) - notifHandler.tracerEnable = enableTracer + notifHandler.c2cConnections = c2cConfig notifHandler.multinode = multinodeConfig if notifHandler.multinode.Enable { notifHandler.multinode.etcdClientConfig = client.Config{ @@ -560,8 +564,8 @@ func (h *Handler) StartHandle(enableTracer bool, multinodeConfig *MultinodeConfi } // prepare tracer agent - if notifHandler.tracerEnable && !notifHandler.multinode.Enable { err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) + if c2cConfig.TracerEnable && !multinodeConfig.Enable && tracerAgent == nil { if err != nil { logrus.WithError(err).Fatalf("failed to start tracer") } @@ -627,7 +631,7 @@ func (h *notifHandler) startBackgroundTracerTask(comSocketPath string) { for _, v := range h.forwardingPorts { containerIfs.ForwardingPorts[v.ChildPort] = v.HostPort } - logrus.Infof("Interfaces = %v", containerIfs) + logrus.Debugf("Interfaces = %v", containerIfs) _, err = comClient.PostInterface(gocontext.TODO(), containerIfs) if err != nil { logrus.WithError(err).Errorf("failed to post interfaces") @@ -659,8 +663,8 @@ func (h *notifHandler) startBackgroundTracerTask(comSocketPath string) { containerIf[dstAddr] = contIf continue } - if h.tracerEnable { - addrRes, err := h.tracer.ConnectToAddress([]string{dstAddr}) + if h.c2cConnections.TracerEnable { + addrRes, err := tracerAgent.ConnectToAddress([]string{dstAddr}) if err != nil { logrus.WithError(err).Debugf("failed to connect to %s", dstAddr) continue @@ -676,7 +680,7 @@ func (h *notifHandler) startBackgroundTracerTask(comSocketPath string) { hostPort: hostPort, lastCheckedUnix: time.Now().Unix(), } - logrus.Debugf("%s -> 127.0.0.1:%d is registered", dstAddr, hostPort) + logrus.Infof("%s -> 127.0.0.1:%d is registered", dstAddr, hostPort) } } } @@ -716,6 +720,10 @@ func (h *notifHandler) startBackgroundMultinodeTask() { } else { logrus.Infof("Registered %s -> %s", containerAddr, hostAddr) } + err = h.multinode.etcdClient.Sync(gocontext.TODO()) + if err != nil { + logrus.WithError(err).Errorf("failed to sync etcdClient") + } } } } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index b76e640..0c663f3 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -178,9 +178,10 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { if handler.multinode.Enable && destAddr.IP.IsPrivate() { // currently, only private addresses are available in multinode communication. + key := ETCD_MULTINODE_PREFIX + destAddr.String() res, err := handler.multinode.etcdKeyApi.Get(gocontext.TODO(), ETCD_MULTINODE_PREFIX+destAddr.String(), nil) if err != nil { - ss.logger.Warnf("destination address %q is not registered", destAddr.String()) + ss.logger.WithError(err).Warnf("destination address %q is not registered", key) } else { hostAddrWithPort := res.Node.Value hostAddrs := strings.Split(hostAddrWithPort, ":") @@ -201,7 +202,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { connectToOtherBypassedContainer = true ss.logger.Infof("destination address %v is container address and bypassed via overlay network", destAddr) } - } else { + } else if handler.c2cConnections.Enable { contIf, ok := handler.containerInterfaces[destAddr.String()] if ok { ss.logger.Infof("destination address %v is container address and bypassed", destAddr) diff --git a/pkg/bypass4netnsd/bypass4netnsd.go b/pkg/bypass4netnsd/bypass4netnsd.go index 66ce2c2..bf17c77 100644 --- a/pkg/bypass4netnsd/bypass4netnsd.go +++ b/pkg/bypass4netnsd/bypass4netnsd.go @@ -23,6 +23,7 @@ type Driver struct { lock sync.RWMutex containerInterfaces map[string]com.ContainerInterfaces interfacesLock sync.RWMutex + HandleC2CEnable bool TracerEnable bool MultinodeEnable bool MultinodeEtcdAddress string @@ -87,6 +88,9 @@ func (d *Driver) StartBypass(spec *api.BypassSpec) (*api.BypassStatus, error) { } b4nnArgs = append(b4nnArgs, fmt.Sprintf("--com-socket=%s", d.ComSocketPath)) + if d.HandleC2CEnable { + b4nnArgs = append(b4nnArgs, "--handle-c2c-connections") + } if d.TracerEnable { b4nnArgs = append(b4nnArgs, "--tracer=true") } From 5d94ce9cc95ccfbd35cd7ac925b0842263ddac1a Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 22 Nov 2023 19:30:12 +0000 Subject: [PATCH 18/55] run only one tracer agent Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 21 +++++++++++++-------- pkg/bypass4netns/tracer/tracer.go | 7 +++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index e6b6ade..ac8eb91 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -495,7 +495,6 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState state: state, forwardingPorts: map[int]ForwardPortMapping{}, processes: map[uint32]*processStatus{}, - tracer: tracer.NewTracer(h.tracerAgentLogPath), } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate @@ -526,6 +525,9 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon syscall.Close(h.readyFd) } + // prepare tracer agent + var tracerAgent *tracer.Tracer = nil + for { conn, err := l.Accept() logrus.Info("accept connection") @@ -563,9 +565,11 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon notifHandler.multinode.etcdKeyApi = client.NewKeysAPI(notifHandler.multinode.etcdClient) } - // prepare tracer agent - err = notifHandler.tracer.StartTracer(gocontext.TODO(), state.Pid) + // not to run multiple tracerAgent. + // TODO: prepare only one tracerAgent in Handler if c2cConfig.TracerEnable && !multinodeConfig.Enable && tracerAgent == nil { + tracerAgent = tracer.NewTracer(h.tracerAgentLogPath) + err = tracerAgent.StartTracer(gocontext.TODO(), state.Pid) if err != nil { logrus.WithError(err).Fatalf("failed to start tracer") } @@ -573,7 +577,7 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon for _, v := range notifHandler.forwardingPorts { fwdPorts = append(fwdPorts, v.ChildPort) } - err = notifHandler.tracer.RegisterForwardPorts(fwdPorts) + err = tracerAgent.RegisterForwardPorts(fwdPorts) if err != nil { logrus.WithError(err).Fatalf("failed to register port") } @@ -582,7 +586,7 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon // check tracer agent is ready for _, v := range fwdPorts { dst := fmt.Sprintf("127.0.0.1:%d", v) - addr, err := notifHandler.tracer.ConnectToAddress([]string{dst}) + addr, err := tracerAgent.ConnectToAddress([]string{dst}) if err != nil { logrus.WithError(err).Warnf("failed to connect to %s", dst) continue @@ -598,16 +602,17 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon logrus.Infof("tracer is disabled") } + // TODO: these goroutines shoud be launched only once. if notifHandler.multinode.Enable { go notifHandler.startBackgroundMultinodeTask() - } else if notifHandler.tracerEnable { - go notifHandler.startBackgroundTracerTask(h.comSocketPath) + } else if notifHandler.c2cConnections.Enable { + go notifHandler.startBackgroundC2CConnectionHandleTask(h.comSocketPath, tracerAgent) } go notifHandler.handle() } } -func (h *notifHandler) startBackgroundTracerTask(comSocketPath string) { +func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath string, tracerAgent *tracer.Tracer) { logrus.Info("Started bypass4netns background task") comClient, err := com.NewComClient(comSocketPath) if err != nil { diff --git a/pkg/bypass4netns/tracer/tracer.go b/pkg/bypass4netns/tracer/tracer.go index b40f1dd..97d1608 100644 --- a/pkg/bypass4netns/tracer/tracer.go +++ b/pkg/bypass4netns/tracer/tracer.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "strconv" + "sync" "github.com/rootless-containers/bypass4netns/pkg/util" "golang.org/x/sys/unix" @@ -18,11 +19,14 @@ type Tracer struct { tracerCmd *exec.Cmd reader io.Reader writer io.Writer + + lock sync.Mutex } func NewTracer(logPath string) *Tracer { return &Tracer{ logPath: logPath, + lock: sync.Mutex{}, } } @@ -97,6 +101,9 @@ func (x *Tracer) RegisterForwardPorts(ports []int) error { } func (x *Tracer) ConnectToAddress(addrs []string) ([]string, error) { + x.lock.Lock() + defer x.lock.Unlock() + cmd := TracerCommand{ Cmd: ConnectToAddress, DestinationAddress: addrs, From caf9bdca0a38ac9e93362cc2d00f9289b207a4cf Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 22 Nov 2023 19:30:22 +0000 Subject: [PATCH 19/55] Add tests Signed-off-by: Naoki MATSUMOTO --- test/init_test.sh | 12 +++-- test/run_test.sh | 115 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 121 insertions(+), 6 deletions(-) diff --git a/test/init_test.sh b/test/init_test.sh index 0a0793d..b0d2691 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -20,7 +20,12 @@ echo "===== Prepare =====" sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd + sudo systemctl stop etcd + sudo systemctl disable etcd + HOST_IP=$(hostname -I | sed 's/ //') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemctl --user start dbus curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz | sudo tar Cxz /usr/local @@ -60,7 +65,4 @@ EOF hostname -I | awk '{print $1}' | tee /tmp/host_ip ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json - - - systemd-run --user --unit run-iperf3 iperf3 -s -) \ No newline at end of file +) diff --git a/test/run_test.sh b/test/run_test.sh index ba7e170..512763b 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -2,15 +2,36 @@ set -eu -o pipefail +source ~/.profile + ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" nerdctl pull --quiet "${ALPINE_IMAGE}" SCRIPT_DIR=$(cd $(dirname $0); pwd) cd $SCRIPT_DIR +rm -rf ~/bypass4netns +sudo cp -r /host ~/bypass4netns +sudo chown -R ubuntu:ubuntu ~/bypass4netns +cd ~/bypass4netns +rm -f bypass4netns bypass4netnsd +make +sudo make install +cd $SCRIPT_DIR + +set +e +systemctl --user stop run-iperf3 +set -e + +systemd-run --user --unit run-iperf3 iperf3 -s + echo "===== '--ignore' option test =====" ( - set -x + set +e + systemctl --user stop run-bypass4netns + nerdctl rm -f test + set -ex + systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity nerdctl exec test apk add --no-cache iperf3 @@ -52,6 +73,98 @@ echo "===== Test bypass4netnsd =====" ./test_b4nnd.sh ) +echo "===== tracer test (disabled) =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --handle-c2c-connections=true + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd +) + +echo "===== tracer test (enabled) =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --handle-c2c-connections=true --tracer=true --debug + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + set +e + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must not success to connect. + if [ $? -eq 0 ]; then + echo "tracer seems not working" + exit 1 + fi + set -e + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd +) + + +echo "===== multinode test (single node) ====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user reset-failed + set -ex + + HOST_IP=$(hostname -I | sed 's/ //') + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP --debug + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test1 apk add --no-cache iperf3 + TEST1_ADDR=$(nerdctl exec test1 hostname -i) + systemd-run --user --unit run-test1-iperf3 nerdctl exec test1 iperf3 -s + nerdctl network create --subnet "10.4.1.0/24" net-2 + nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test2 apk add --no-cache iperf3 + # wait the key is propagated to etcd + # TODO: why it takes so much time? + sleep 15 + nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. + + nerdctl rm -f test1 + nerdctl rm -f test2 + nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd +) + echo "===== Benchmark: netns -> host With bypass4netns =====" ( set -x From 9f60cb233ae1431d0cd7107d2734ddc53f0954a3 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 23 Nov 2023 11:19:46 +0000 Subject: [PATCH 20/55] check interfaces via iproute2 Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 73 +++++-- pkg/bypass4netns/iproute2/iproute2.go | 88 ++++++++ pkg/bypass4netns/iproute2/iproute2_test.go | 190 ++++++++++++++++++ .../nonbypassable/nonbypassable.go | 88 +++----- pkg/bypass4netns/nsagent/nsagent.go | 3 +- pkg/bypass4netns/nsagent/types/types.go | 5 +- pkg/bypass4netns/socket.go | 2 +- test/run_test.sh | 93 ++++++--- 8 files changed, 430 insertions(+), 112 deletions(-) create mode 100644 pkg/bypass4netns/iproute2/iproute2.go create mode 100644 pkg/bypass4netns/iproute2/iproute2_test.go diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index ac8eb91..3be1766 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -16,6 +16,7 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" "github.com/oraoto/go-pidfd" "github.com/rootless-containers/bypass4netns/pkg/api/com" + "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/iproute2" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nonbypassable" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" libseccomp "github.com/seccomp/libseccomp-golang" @@ -472,7 +473,9 @@ type notifHandler struct { state *specs.ContainerProcessState nonBypassable *nonbypassable.NonBypassable nonBypassableAutoUpdate bool - forwardingPorts map[int]ForwardPortMapping + + // key is child port + forwardingPorts map[int]ForwardPortMapping // key is pid processes map[uint32]*processStatus @@ -625,9 +628,17 @@ func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath stri logrus.Infof("Successfully connected to bypass4netnsd") ifLastUpdateUnix := int64(0) for { - lastUpdated := h.nonBypassable.GetLastUpdateUnix() - if lastUpdated > ifLastUpdateUnix { - ifs := h.nonBypassable.GetInterfaces() + if ifLastUpdateUnix+10 < time.Now().Unix() { + addrs, err := iproute2.GetAddressesInNetNS(gocontext.TODO(), h.state.Pid) + if err != nil { + logrus.WithError(err).Errorf("failed to get addresses") + return + } + ifs, err := iproute2AddressesToComInterfaces(addrs) + if err != nil { + logrus.WithError(err).Errorf("failed to convert addresses") + return + } containerIfs := &com.ContainerInterfaces{ ContainerID: h.state.State.ID, Interfaces: ifs, @@ -642,7 +653,7 @@ func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath stri logrus.WithError(err).Errorf("failed to post interfaces") } else { logrus.Infof("successfully posted updated interfaces") - ifLastUpdateUnix = lastUpdated + ifLastUpdateUnix = time.Now().Unix() } } containerInterfaces, err := comClient.ListInterfaces(gocontext.TODO()) @@ -696,23 +707,55 @@ func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath stri } } +func iproute2AddressesToComInterfaces(addrs iproute2.Addresses) ([]com.Interface, error) { + comIntfs := []com.Interface{} + for _, intf := range addrs { + comIntf := com.Interface{ + Name: intf.IfName, + Addresses: []net.IPNet{}, + IsLoopback: intf.LinkType == "loopback", + } + hwAddr, err := net.ParseMAC(intf.Address) + if err != nil { + return nil, fmt.Errorf("failed to parse HWAddress: %w", err) + } + comIntf.HWAddr = hwAddr + for _, addr := range intf.AddrInfos { + ip, ipNet, err := net.ParseCIDR(fmt.Sprintf("%s/%d", addr.Local, addr.PrefixLen)) + if err != nil { + return nil, fmt.Errorf("failed to parse addr_info: %w", err) + } + ipNet.IP = ip + comIntf.Addresses = append(comIntf.Addresses, *ipNet) + } + + comIntfs = append(comIntfs, comIntf) + } + + return comIntfs, nil +} + func (h *notifHandler) startBackgroundMultinodeTask() { ifLastUpdateUnix := int64(0) for { - lastUpdated := h.nonBypassable.GetLastUpdateUnix() - if lastUpdated > ifLastUpdateUnix || ifLastUpdateUnix+10 < time.Now().Unix() { - ifs := h.nonBypassable.GetInterfaces() + if ifLastUpdateUnix+10 < time.Now().Unix() { + ifs, err := iproute2.GetAddressesInNetNS(gocontext.TODO(), h.state.Pid) + if err != nil { + logrus.WithError(err).Errorf("failed to get addresses") + return + } for _, intf := range ifs { - if intf.IsLoopback { + // ignore non-ethernet interface + if intf.LinkType != "ether" { continue } - for _, addr := range intf.Addresses { - // ignore IPv6 address - if addr.IP.To4() == nil { + for _, addr := range intf.AddrInfos { + // ignore non-IPv4 address + if addr.Family != "inet" { continue } for _, v := range h.forwardingPorts { - containerAddr := fmt.Sprintf("%s:%d", addr.IP, v.ChildPort) + containerAddr := fmt.Sprintf("%s:%d", addr.Local, v.ChildPort) hostAddr := fmt.Sprintf("%s:%d", h.multinode.HostAddress, v.HostPort) // Remove entries with timeout // TODO: Remove related entries when exiting. @@ -725,10 +768,6 @@ func (h *notifHandler) startBackgroundMultinodeTask() { } else { logrus.Infof("Registered %s -> %s", containerAddr, hostAddr) } - err = h.multinode.etcdClient.Sync(gocontext.TODO()) - if err != nil { - logrus.WithError(err).Errorf("failed to sync etcdClient") - } } } } diff --git a/pkg/bypass4netns/iproute2/iproute2.go b/pkg/bypass4netns/iproute2/iproute2.go new file mode 100644 index 0000000..9496278 --- /dev/null +++ b/pkg/bypass4netns/iproute2/iproute2.go @@ -0,0 +1,88 @@ +package iproute2 + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "strconv" + + "github.com/rootless-containers/bypass4netns/pkg/util" + "golang.org/x/sys/unix" +) + +type AddrInfo struct { + Family string `json:"family"` + Local string `json:"local"` + PrefixLen int `json:"prefixlen"` + Broadcast string `json:"broadcast"` + Scope string `json:"scope"` + Label string `json:"label"` + ValidLifeTime int `json:"valid_life_time"` + PreferredLifeTime int `json:"preferred_life_time"` +} + +type Interface struct { + IfIndex int `json:"ifindex"` + IfName string `json:"ifname"` + Flags []string `json:"flags"` + Mtu int `json:"mtu"` + Qdisc string `json:"noqueue"` + Operstate string `json:"operstate"` + Group string `json:"group"` + TxQLen int `json:"txqlen"` + LinkType string `json:"link_type"` + Address string `json:"address"` + Broadcast string `json:"broadcast"` + AddrInfos []AddrInfo `json:"addr_info"` +} + +type Addresses = []Interface + +func UnmarshalAddress(jsonAddrs []byte) (Addresses, error) { + var addrs = Addresses{} + + err := json.Unmarshal(jsonAddrs, &addrs) + if err != nil { + return nil, err + } + + return addrs, nil +} + +func GetAddressesInNetNS(ctx context.Context, pid int) (Addresses, error) { + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return nil, err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(pid), + "-F", + "-n", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(pid, selfPid) + if err != nil { + return nil, fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", "ip", "-j", "addr", "show") + cmd := exec.CommandContext(ctx, nsenter, nsenterFlags...) + cmd.SysProcAttr = &unix.SysProcAttr{ + Pdeathsig: unix.SIGTERM, + } + stdout, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to start %v: %w", cmd.Args, err) + } + + addrs, err := UnmarshalAddress(stdout) + if err != nil { + return nil, fmt.Errorf("failed to parse json: %w", err) + } + + return addrs, nil +} diff --git a/pkg/bypass4netns/iproute2/iproute2_test.go b/pkg/bypass4netns/iproute2/iproute2_test.go new file mode 100644 index 0000000..184d992 --- /dev/null +++ b/pkg/bypass4netns/iproute2/iproute2_test.go @@ -0,0 +1,190 @@ +package iproute2 + +import ( + "fmt" + "net" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUnmarshalAddress(t *testing.T) { + testJson := ` +[ + { + "ifindex":1, + "ifname":"lo", + "flags":[ + "LOOPBACK", + "UP", + "LOWER_UP" + ], + "mtu":65536, + "qdisc":"noqueue", + "operstate":"UNKNOWN", + "group":"default", + "txqlen":1000, + "link_type":"loopback", + "address":"00:00:00:00:00:00", + "broadcast":"00:00:00:00:00:00", + "addr_info":[ + { + "family":"inet", + "local":"127.0.0.1", + "prefixlen":8, + "scope":"host", + "label":"lo", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + }, + { + "family":"inet6", + "local":"::1", + "prefixlen":128, + "scope":"host", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":2, + "ifname":"enp1s0", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"fq_codel", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"52:54:00:c3:92:b6", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"192.168.1.155", + "prefixlen":24, + "broadcast":"192.168.1.255", + "scope":"global", + "label":"enp1s0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + }, + { + "family":"inet6", + "local":"fe80::5054:ff:fec3:92b6", + "prefixlen":64, + "scope":"link", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":3, + "ifname":"docker0", + "flags":[ + "NO-CARRIER", + "BROADCAST", + "MULTICAST", + "UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "operstate":"DOWN", + "group":"default", + "link_type":"ether", + "address":"02:42:ab:c8:78:84", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"172.17.0.1", + "prefixlen":16, + "broadcast":"172.17.255.255", + "scope":"global", + "label":"docker0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":61, + "ifname":"lxdbr0", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"00:16:3e:4d:92:98", + "broadcast":"ff:ff:ff:ff:ff:ff", + "addr_info":[ + { + "family":"inet", + "local":"192.168.6.1", + "prefixlen":24, + "scope":"global", + "label":"lxdbr0", + "valid_life_time":4294967295, + "preferred_life_time":4294967295 + } + ] + }, + { + "ifindex":71, + "link_index":70, + "ifname":"veth71db11e7", + "flags":[ + "BROADCAST", + "MULTICAST", + "UP", + "LOWER_UP" + ], + "mtu":1500, + "qdisc":"noqueue", + "master":"lxdbr0", + "operstate":"UP", + "group":"default", + "txqlen":1000, + "link_type":"ether", + "address":"da:83:f0:97:c7:14", + "broadcast":"ff:ff:ff:ff:ff:ff", + "link_netnsid":0, + "addr_info":[ + + ] + } +] + ` + + addrs, err := UnmarshalAddress([]byte(testJson)) + assert.Equal(t, nil, err) + assert.Equal(t, 5, len(addrs)) + intf := addrs[1] + assert.Equal(t, "UP", intf.Operstate) + assert.Equal(t, "ether", intf.LinkType) + assert.Equal(t, 2, len(intf.AddrInfos)) + addr := intf.AddrInfos[0] + assert.Equal(t, "inet", addr.Family) + assert.Equal(t, "192.168.1.155", addr.Local) + addrIp, addrCidr, err := net.ParseCIDR(fmt.Sprintf("%s/%d", addr.Local, addr.PrefixLen)) + assert.Equal(t, nil, err) + addrCidr.IP = addrIp + assert.Equal(t, "192.168.1.155/24", addrCidr.String()) + addr2 := intf.AddrInfos[1] + assert.Equal(t, "inet6", addr2.Family) + assert.Equal(t, "fe80::5054:ff:fec3:92b6", addr2.Local) +} diff --git a/pkg/bypass4netns/nonbypassable/nonbypassable.go b/pkg/bypass4netns/nonbypassable/nonbypassable.go index 7c3b430..cc0bf61 100644 --- a/pkg/bypass4netns/nonbypassable/nonbypassable.go +++ b/pkg/bypass4netns/nonbypassable/nonbypassable.go @@ -13,9 +13,7 @@ import ( "os/signal" "strconv" "sync" - "time" - "github.com/rootless-containers/bypass4netns/pkg/api/com" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nsagent/types" "github.com/rootless-containers/bypass4netns/pkg/util" "github.com/sirupsen/logrus" @@ -32,11 +30,9 @@ func New(staticList []net.IPNet) *NonBypassable { // NonBypassable maintains the list of the non-bypassable CIDRs, // such as 127.0.0.0/8 and CNI bridge CIDRs in the slirp's network namespace. type NonBypassable struct { - staticList []net.IPNet - dynamicList []net.IPNet - interfaces []com.Interface - lastUpdateUnix int64 - mu sync.RWMutex + staticList []net.IPNet + dynamicList []net.IPNet + mu sync.RWMutex } func (x *NonBypassable) Contains(ip net.IP) bool { @@ -50,32 +46,32 @@ func (x *NonBypassable) Contains(ip net.IP) bool { return false } -func (x *NonBypassable) IsInterfaceIPAddress(ip net.IP) bool { - x.mu.RLock() - defer x.mu.RUnlock() - for _, intf := range x.interfaces { - for _, intfIP := range intf.Addresses { - if intfIP.IP.Equal(ip) { - return true - } - } - } - - return false -} - -func (x *NonBypassable) GetInterfaces() []com.Interface { - x.mu.RLock() - defer x.mu.RUnlock() - ips := append([]com.Interface{}, x.interfaces...) - return ips -} - -func (x *NonBypassable) GetLastUpdateUnix() int64 { - x.mu.RLock() - defer x.mu.RUnlock() - return x.lastUpdateUnix -} +//func (x *NonBypassable) IsInterfaceIPAddress(ip net.IP) bool { +// x.mu.RLock() +// defer x.mu.RUnlock() +// for _, intf := range x.interfaces { +// for _, intfIP := range intf.Addresses { +// if intfIP.IP.Equal(ip) { +// return true +// } +// } +// } +// +// return false +//} +// +//func (x *NonBypassable) GetInterfaces() []com.Interface { +// x.mu.RLock() +// defer x.mu.RUnlock() +// ips := append([]com.Interface{}, x.interfaces...) +// return ips +//} +// +//func (x *NonBypassable) GetLastUpdateUnix() int64 { +// x.mu.RLock() +// defer x.mu.RUnlock() +// return x.lastUpdateUnix +//} // WatchNS watches the NS associated with the PID and updates the internal dynamic list on receiving SIGHUP. func (x *NonBypassable) WatchNS(ctx context.Context, pid int) error { @@ -138,15 +134,9 @@ func (x *NonBypassable) watchNS(r io.Reader) { continue } var newList []net.IPNet - var newInterfaces []com.Interface for _, intf := range msg.Interfaces { - i := com.Interface{ - Name: intf.Name, - Addresses: make([]net.IPNet, 0), - IsLoopback: false, - } for _, cidr := range intf.CIDRs { - ip, ipNet, err := net.ParseCIDR(cidr) + _, ipNet, err := net.ParseCIDR(cidr) if err != nil { logrus.WithError(err).Warnf("Dynamic non-bypassable list: Failed to parse nsagent message %q: %q: bad CIDR %q", line, intf.Name, cidr) continue @@ -154,29 +144,11 @@ func (x *NonBypassable) watchNS(r io.Reader) { if ipNet != nil { newList = append(newList, *ipNet) } - if ip.IsLoopback() { - i.IsLoopback = true - } - ifIPNet := net.IPNet{ - IP: ip, - Mask: ipNet.Mask, - } - i.Addresses = append(i.Addresses, ifIPNet) - } - if !i.IsLoopback { - var err error - i.HWAddr, err = net.ParseMAC(intf.HWAddr) - if err != nil { - logrus.WithError(err).Errorf("invalid hardware address %q ifName=%s is ignored", intf.HWAddr, intf.Name) - } } - newInterfaces = append(newInterfaces, i) } x.mu.Lock() logrus.Infof("Dynamic non-bypassable list: old dynamic=%v, new dynamic=%v, static=%v", x.dynamicList, newList, x.staticList) x.dynamicList = newList - x.interfaces = newInterfaces - x.lastUpdateUnix = time.Now().Unix() x.mu.Unlock() } if err := scanner.Err(); err != nil { diff --git a/pkg/bypass4netns/nsagent/nsagent.go b/pkg/bypass4netns/nsagent/nsagent.go index 0dc68c4..1ff4d31 100644 --- a/pkg/bypass4netns/nsagent/nsagent.go +++ b/pkg/bypass4netns/nsagent/nsagent.go @@ -47,8 +47,7 @@ func inspect(w io.Writer) error { continue } entry := types.Interface{ - Name: intf.Name, - HWAddr: intf.HardwareAddr.String(), + Name: intf.Name, } for _, addr := range addrs { if ipNet, ok := addr.(*net.IPNet); ok { diff --git a/pkg/bypass4netns/nsagent/types/types.go b/pkg/bypass4netns/nsagent/types/types.go index f9e5c36..62158b7 100644 --- a/pkg/bypass4netns/nsagent/types/types.go +++ b/pkg/bypass4netns/nsagent/types/types.go @@ -5,7 +5,6 @@ type Message struct { } type Interface struct { - Name string `json:"name"` // "lo", "eth0", etc. - HWAddr string `json:"hwAddr"` - CIDRs []string `json:"cidrs"` // sorted as strings + Name string `json:"name"` // "lo", "eth0", etc. + CIDRs []string `json:"cidrs"` // sorted as strings } diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 0c663f3..c9c5839 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -170,7 +170,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { if destAddr.IP.IsLoopback() { ss.logger.Infof("destination address %v is loopback and bypassed", destAddr) connectToLoopback = true - } else if handler.nonBypassable.IsInterfaceIPAddress(destAddr.IP) { + } else if contIf, ok := handler.containerInterfaces[destAddr.String()]; ok && contIf.containerID == handler.state.State.ID { ss.logger.Infof("destination address %v is interface's address and bypassed", destAddr) connectToInterface = true } diff --git a/test/run_test.sh b/test/run_test.sh index 512763b..cc95b75 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -10,9 +10,22 @@ nerdctl pull --quiet "${ALPINE_IMAGE}" SCRIPT_DIR=$(cd $(dirname $0); pwd) cd $SCRIPT_DIR -rm -rf ~/bypass4netns -sudo cp -r /host ~/bypass4netns -sudo chown -R ubuntu:ubuntu ~/bypass4netns +set +u + +if [ ! -v 1 ]; then + echo "COPY" + rm -rf ~/bypass4netns + sudo cp -r /host ~/bypass4netns + sudo chown -R ubuntu:ubuntu ~/bypass4netns + cd ~/bypass4netns + exec $0 "FORK" + exit 0 +fi + +set -u + +echo "THIS IS FORK" + cd ~/bypass4netns rm -f bypass4netns bypass4netnsd make @@ -21,6 +34,8 @@ cd $SCRIPT_DIR set +e systemctl --user stop run-iperf3 +systemctl --user reset-failed +sleep 1 set -e systemd-run --user --unit run-iperf3 iperf3 -s @@ -156,7 +171,6 @@ echo "===== multinode test (single node) ====" nerdctl exec test2 apk add --no-cache iperf3 # wait the key is propagated to etcd # TODO: why it takes so much time? - sleep 15 nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. nerdctl rm -f test1 @@ -167,44 +181,61 @@ echo "===== multinode test (single node) ====" echo "===== Benchmark: netns -> host With bypass4netns =====" ( - set -x + set +e + nerdctl rm -f test + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex - # start bypass4netnsd for nerdctl integration - systemd-run --user --unit run-bypass4netnsd bypass4netnsd - sleep 1 - nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test + # start bypass4netnsd for nerdctl integration + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" + nerdctl rm -f test ) echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" ( - set -x - nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test + set +e + nerdctl rm -f test + set -ex + + nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" + nerdctl rm -f test ) echo "===== Benchmark: host -> netns With bypass4netns =====" ( - set -x - nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 - sleep 1 # waiting `iperf3 -s -4` becomes ready - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns + systemctl --user reset-failed + set -ex + + nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 + sleep 1 # waiting `iperf3 -s -4` becomes ready + iperf3 -c "$(cat /tmp/host_ip)" -p 8080 + nerdctl rm -f test ) echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" ( - set -x - nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 - sleep 1 - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns2 + systemctl --user reset-failed + set -ex + + nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 + sleep 1 + iperf3 -c "$(cat /tmp/host_ip)" -p 8080 + nerdctl rm -f test ) \ No newline at end of file From 3b9044ff40cf7ffcbe9aeaa3ef17521002f56e05 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 23 Nov 2023 11:43:41 +0000 Subject: [PATCH 21/55] wait for background task registers interfaces Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 3be1766..409afd4 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -606,16 +606,24 @@ func (h *Handler) StartHandle(c2cConfig *C2CConnectionHandleConfig, multinodeCon } // TODO: these goroutines shoud be launched only once. + ready := make(chan bool, 10) if notifHandler.multinode.Enable { - go notifHandler.startBackgroundMultinodeTask() + go notifHandler.startBackgroundMultinodeTask(ready) } else if notifHandler.c2cConnections.Enable { - go notifHandler.startBackgroundC2CConnectionHandleTask(h.comSocketPath, tracerAgent) + go notifHandler.startBackgroundC2CConnectionHandleTask(ready, h.comSocketPath, tracerAgent) + } else { + ready <- true } + + // wait for background tasks becoming ready + <-ready + logrus.Info("background task is ready. start to handle") go notifHandler.handle() } } -func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath string, tracerAgent *tracer.Tracer) { +func (h *notifHandler) startBackgroundC2CConnectionHandleTask(ready chan bool, comSocketPath string, tracerAgent *tracer.Tracer) { + initDone := false logrus.Info("Started bypass4netns background task") comClient, err := com.NewComClient(comSocketPath) if err != nil { @@ -703,6 +711,12 @@ func (h *notifHandler) startBackgroundC2CConnectionHandleTask(comSocketPath stri } h.containerInterfaces = containerIf + // once the interfaces are registered, it is ready to handle connections + if !initDone { + initDone = true + ready <- true + } + time.Sleep(1 * time.Second) } } @@ -735,7 +749,8 @@ func iproute2AddressesToComInterfaces(addrs iproute2.Addresses) ([]com.Interface return comIntfs, nil } -func (h *notifHandler) startBackgroundMultinodeTask() { +func (h *notifHandler) startBackgroundMultinodeTask(ready chan bool) { + initDone := false ifLastUpdateUnix := int64(0) for { if ifLastUpdateUnix+10 < time.Now().Unix() { @@ -772,7 +787,14 @@ func (h *notifHandler) startBackgroundMultinodeTask() { } } ifLastUpdateUnix = time.Now().Unix() + + // once the interfaces are registered, it is ready to handle connections + if !initDone { + initDone = true + ready <- true + } } + time.Sleep(1 * time.Second) } } From d9fddceefc538a83502072342a380e31dc8f2258 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 23 Nov 2023 12:16:20 +0000 Subject: [PATCH 22/55] retry until success Signed-off-by: Naoki MATSUMOTO --- launch_test_lxc.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/launch_test_lxc.sh b/launch_test_lxc.sh index 6f4e476..4918361 100755 --- a/launch_test_lxc.sh +++ b/launch_test_lxc.sh @@ -7,4 +7,11 @@ sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test sudo lxc config device add test share disk source=$(pwd) path=/host sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" # let user services running -sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && sudo loginctl enable-linger" +# this sometimes fails, retry until success +RES=1 +while [ $RES -ne 0 ] +do + sleep 1 + sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sudo loginctl enable-linger" + RES=$? +done From e2a7c0ea73bcdec7e02ac47d9c68413494298a9e Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 23 Nov 2023 19:19:59 +0000 Subject: [PATCH 23/55] add benchmark with redis Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 2 + benchmark/redis.sh | 88 +++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100755 benchmark/redis.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1acf70c..319269c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -47,4 +47,6 @@ jobs: run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh - name: run tests run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/test/run_test.sh + - name: benchmark (redis) + run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/redis.sh diff --git a/benchmark/redis.sh b/benchmark/redis.sh new file mode 100755 index 0000000..d47fe9e --- /dev/null +++ b/benchmark/redis.sh @@ -0,0 +1,88 @@ +#!/bin/bash + + +set -eu -o pipefail + +REDIS_VERSION=7.2.3 +REDIS_IMAGE="redis:${REDIS_VERSION}" + +source ~/.profile + +nerdctl pull $REDIS_IMAGE + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f redis-server + nerdctl rm -f redis-client + set -ex + + nerdctl run -d --name redis-server "${REDIS_IMAGE}" + nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(nerdctl exec redis-server hostname -i) + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + nerdctl rm -f redis-server + nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f redis-server + nerdctl rm -f redis-client + set -ex + + nerdctl run -d -p 6379:6379 --name redis-server "${REDIS_IMAGE}" + nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(hostname -I) + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + nerdctl rm -f redis-server + nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + + # work around for https://github.com/naoki9911/bypass4netns/issues/1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' "${REDIS_IMAGE}" redis-server + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(hostname -I) + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user reset-failed + set -ex + + HOST_IP=$(hostname -I | sed 's/ //') + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + # work around for https://github.com/naoki9911/bypass4netns/issues/1 + nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' "${REDIS_IMAGE}" redis-server + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(nerdctl exec redis-server hostname -i) + # without this, benchmark is not performed.(race condition?) + nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" + + nerdctl rm -f redis-server + nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd +) \ No newline at end of file From 71c01a17d1b016a7e0f4bb55adf9e360d95354d3 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 23 Nov 2023 19:22:19 +0000 Subject: [PATCH 24/55] fix comments Signed-off-by: Naoki MATSUMOTO --- benchmark/redis.sh | 4 ++-- test/run_test.sh | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmark/redis.sh b/benchmark/redis.sh index d47fe9e..863adf7 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -8,7 +8,7 @@ REDIS_IMAGE="redis:${REDIS_VERSION}" source ~/.profile -nerdctl pull $REDIS_IMAGE +nerdctl pull --quiet $REDIS_IMAGE echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( @@ -79,7 +79,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' "${REDIS_IMAGE}" redis-server nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) - # without this, benchmark is not performed.(race condition?) + # without 'sleep 1', benchmark is not performed.(race condition?) nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" nerdctl rm -f redis-server diff --git a/test/run_test.sh b/test/run_test.sh index cc95b75..7685d58 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -169,8 +169,6 @@ echo "===== multinode test (single node) ====" nerdctl network create --subnet "10.4.1.0/24" net-2 nerdctl run --net net-2 --label nerdctl/bypass4netns=true -d --name test2 "${ALPINE_IMAGE}" sleep infinity nerdctl exec test2 apk add --no-cache iperf3 - # wait the key is propagated to etcd - # TODO: why it takes so much time? nerdctl exec test2 iperf3 -c $TEST1_ADDR -t 1 --connect-timeout 1000 # it must success to connect. nerdctl rm -f test1 From e575f7a34acca287cde53e9db974b2d3af6efee8 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 24 Nov 2023 18:30:30 +0000 Subject: [PATCH 25/55] add benchmark for multinode Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 15 +++++++--- benchmark/redis_multinode.sh | 55 ++++++++++++++++++++++++++++++++++ debug.sh | 39 ++++++++++++++++++++++++ enter.sh | 6 ++++ test/init_test.sh | 2 +- test/multinode.sh | 58 ++++++++++++++++++++++++++++++++++++ test/setup_vxlan.sh | 28 +++++++++++++++++ util.sh | 11 +++++++ 8 files changed, 209 insertions(+), 5 deletions(-) create mode 100755 benchmark/redis_multinode.sh create mode 100755 debug.sh create mode 100755 enter.sh create mode 100755 test/multinode.sh create mode 100755 test/setup_vxlan.sh create mode 100644 util.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 319269c..3391743 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -30,8 +30,9 @@ jobs: timeout-minutes: 20 steps: - uses: actions/checkout@v3.0.2 - #- name: install lxd (v5.19) - # run: sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 + - name: install lxd (v5.19) + run: sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 + - run: sudo modprobe vxlan - name: configure lxd run: cat test/lxd.yaml | sudo lxd init --preseed && sudo sysctl -w net.ipv4.ip_forward=1 # thanks to https://andreas.scherbaum.la/post/2023-01-18_fix-lxc-network-issues-in-ubuntu-22.04/ @@ -39,14 +40,20 @@ jobs: run: sudo iptables -I DOCKER-USER -i lxdbr0 -o eth0 -j ACCEPT - name: Disable Docker Firewall 2 run: sudo iptables -I DOCKER-USER -o lxdbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT - - name: debug - run: sudo lxc network list && sudo iptables -t nat -L && sudo ufw status && sudo nft list table inet lxd + - name: Disable Docker Firewall 3 + run: sudo iptables -F FORWARD && sudo iptables -P FORWARD ACCEPT + #- name: debug + # run: ./debug.sh - name: launch lxc container run: ./launch_test_lxc.sh - name: install dependencies and build run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh - name: run tests run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/test/run_test.sh + - name: run multinode test + run: ./test/multinode.sh - name: benchmark (redis) run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/redis.sh + - name: multinode benchmark (redis) + run: ./benchmark/redis_multinode.sh diff --git a/benchmark/redis_multinode.sh b/benchmark/redis_multinode.sh new file mode 100755 index 0000000..0a26cbf --- /dev/null +++ b/benchmark/redis_multinode.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +cd $(dirname $0) +. ../util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f redis-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +REDIS_VERSION=7.2.3 +REDIS_IMAGE="redis:${REDIS_VERSION}" + +set -eux -o pipefail + +NAME="test" exec_lxc nerdctl pull --quiet $REDIS_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-server -d $REDIS_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR + + NAME="test" exec_lxc nerdctl rm -f redis-server + NAME="test2" exec_lxc nerdctl rm -f redis-client +) + + +echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' $REDIS_IMAGE redis-server" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" + + NAME="test" exec_lxc nerdctl rm -f redis-server + NAME="test2" exec_lxc nerdctl rm -f redis-client +) \ No newline at end of file diff --git a/debug.sh b/debug.sh new file mode 100755 index 0000000..a10887b --- /dev/null +++ b/debug.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +sudo lxc rm -f test +sudo lxc rm -f test2 + +set -eux -o pipefail + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" + +cd $(dirname $0) +. ./util.sh + +sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test +sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test2 + +sleep 5 + +TEST_ADDR=$(NAME="test" exec_lxc hostname -I) +TEST2_ADDR=$(NAME="test2" exec_lxc hostname -I) + +NAME="test" exec_lxc sudo apt install -y ethtool +NAME="test" exec_lxc sudo ip link add vxlan0 type vxlan id 100 noproxy nolearning remote $TEST2_ADDR dstport 4789 dev eth0 +NAME="test" exec_lxc sudo ethtool -K vxlan0 tx-checksum-ip-generic off +NAME="test" exec_lxc sudo ip a add $TEST1_VXLAN_ADDR/24 dev vxlan0 +NAME="test" exec_lxc sudo ip link set vxlan0 up + +NAME="test2" exec_lxc sudo apt install -y ethtool +NAME="test2" exec_lxc sudo ip link add vxlan0 type vxlan id 100 noproxy nolearning remote $TEST_ADDR dstport 4789 dev eth0 +NAME="test2" exec_lxc sudo ethtool -K vxlan0 tx-checksum-ip-generic off +NAME="test2" exec_lxc sudo ip a add $TEST2_VXLAN_ADDR/24 dev vxlan0 +NAME="test2" exec_lxc sudo ip link set vxlan0 up + +NAME="test" exec_lxc ping -c 5 $TEST2_VXLAN_ADDR + +sudo lxc rm -f test +sudo lxc rm -f test2 \ No newline at end of file diff --git a/enter.sh b/enter.sh new file mode 100755 index 0000000..eebe5ad --- /dev/null +++ b/enter.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -eux -o pipefail + +PID=$(nerdctl inspect $1 | jq '.[0].State.Pid') +nsenter -t $PID -F -U --preserve-credentials -n diff --git a/test/init_test.sh b/test/init_test.sh index b0d2691..9693dbf 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -20,7 +20,7 @@ echo "===== Prepare =====" sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd jq tcpdump ethtool sudo systemctl stop etcd sudo systemctl disable etcd HOST_IP=$(hostname -I | sed 's/ //') diff --git a/test/multinode.sh b/test/multinode.sh new file mode 100755 index 0000000..d762b3d --- /dev/null +++ b/test/multinode.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +cd $(dirname $0) +. ../util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f vxlan +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +set -eux -o pipefail + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name vxlan -d $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh vxlan $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test" exec_lxc systemd-run --user --unit run-test-iperf3 nerdctl exec vxlan iperf3 -s + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name vxlan -d $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh vxlan $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test2" exec_lxc nerdctl exec vxlan iperf3 -c $TEST1_VXLAN_ADDR + + NAME="test" exec_lxc nerdctl rm -f vxlan + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f vxlan +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name vxlan $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test" exec_lxc systemd-run --user --unit run-test-iperf3 nerdctl exec vxlan iperf3 -s + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec vxlan hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name vxlan $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec vxlan apk add --no-cache iperf3 + NAME="test2" exec_lxc nerdctl exec vxlan iperf3 -c $SERVER_IP + + NAME="test" exec_lxc nerdctl rm -f vxlan + NAME="test2" exec_lxc nerdctl rm -f vxlan +) diff --git a/test/setup_vxlan.sh b/test/setup_vxlan.sh new file mode 100755 index 0000000..0d144e6 --- /dev/null +++ b/test/setup_vxlan.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +cd $(dirname $0) +. ../util.sh + +set -eux -o pipefail + +TARGET_CONTAINER=$1 +LOCAL_VXLAN_MAC=$2 +LOCAL_VXLAN_ADDR=$3 +REMOTE_ADDR=$4 +REMOTE_VXLAN_MAC=$5 +REMOTE_VXLAN_ADDR=$6 + +sleep 1 +# thanks to https://blog.tiqwab.com/2021/07/11/linux-network-vxlan.html +PID=$(nerdctl inspect $TARGET_CONTAINER | jq '.[0].State.Pid') + +PID=$PID exec_netns ip link add br0 type bridge +PID=$PID exec_netns ip a add $LOCAL_VXLAN_ADDR/24 dev br0 +PID=$PID exec_netns ip link set dev br0 address $LOCAL_VXLAN_MAC +PID=$PID exec_netns ip link set dev br0 up +PID=$PID exec_netns ip link add vxlan0 type vxlan id 100 noproxy nolearning remote $REMOTE_ADDR dstport 4789 dev eth0 +PID=$PID exec_netns ip link set vxlan0 master br0 +PID=$PID exec_netns ethtool -K vxlan0 tx-checksum-ip-generic off +PID=$PID exec_netns ip link set dev vxlan0 up +PID=$PID exec_netns ip neigh add $REMOTE_VXLAN_ADDR lladdr $REMOTE_VXLAN_MAC dev br0 +PID=$PID exec_netns bridge fdb add $REMOTE_VXLAN_MAC dev vxlan0 self dst $REMOTE_ADDR vni 100 port 4789 diff --git a/util.sh b/util.sh new file mode 100644 index 0000000..268f8c4 --- /dev/null +++ b/util.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eu -o pipefail + +function exec_netns() { + nsenter -t $PID -F -U --preserve-credentials -n -- "$@" +} + +function exec_lxc() { + sudo lxc exec $NAME -- sudo --login --user ubuntu "$@" +} \ No newline at end of file From 8ef1436f8ce0ccff521d29073dd5581b9937fc77 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sun, 26 Nov 2023 10:42:45 +0000 Subject: [PATCH 26/55] read/write non-default user's process mem via agent Signed-off-by: Naoki MATSUMOTO --- benchmark/redis.sh | 25 +++--- benchmark/redis_multinode.sh | 2 +- cmd/bypass4netns/main.go | 9 ++ pkg/bypass4netns/bypass4netns.go | 141 ++++++++++++++++++++++++++++--- pkg/bypass4netns/socket.go | 20 ++--- pkg/util/util.go | 55 ++++++++++++ 6 files changed, 217 insertions(+), 35 deletions(-) diff --git a/benchmark/redis.sh b/benchmark/redis.sh index 863adf7..f4b7545 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -32,10 +32,10 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl rm -f redis-client set -ex - nerdctl run -d -p 6379:6379 --name redis-server "${REDIS_IMAGE}" + nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity - SERVER_IP=$(hostname -I) - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + SERVER_IP=$(hostname -I | awk '{print $1}') + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 nerdctl rm -f redis-server nerdctl rm -f redis-client ) @@ -51,12 +51,10 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via systemd-run --user --unit run-bypass4netnsd bypass4netnsd - - # work around for https://github.com/naoki9911/bypass4netns/issues/1 - nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' "${REDIS_IMAGE}" redis-server - nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client "${REDIS_IMAGE}" sleep infinity - SERVER_IP=$(hostname -I) - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity + SERVER_IP=$(hostname -I | awk '{print $1}') + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 nerdctl rm -f redis-server nerdctl rm -f redis-client @@ -72,15 +70,14 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | sed 's/ //') + HOST_IP=$(hostname -I | awk '{print $1}') systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP - # work around for https://github.com/naoki9911/bypass4netns/issues/1 - nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' "${REDIS_IMAGE}" redis-server - nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client "${REDIS_IMAGE}" sleep infinity + nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) # without 'sleep 1', benchmark is not performed.(race condition?) - nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" + nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP -p 6379" nerdctl rm -f redis-server nerdctl rm -f redis-client diff --git a/benchmark/redis_multinode.sh b/benchmark/redis_multinode.sh index 0a26cbf..5c4d683 100755 --- a/benchmark/redis_multinode.sh +++ b/benchmark/redis_multinode.sh @@ -45,7 +45,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR - NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6379:6379 --name redis-server --entrypoint '' $REDIS_IMAGE redis-server" + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE" SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" diff --git a/cmd/bypass4netns/main.go b/cmd/bypass4netns/main.go index 946b7b9..32c2a6e 100644 --- a/cmd/bypass4netns/main.go +++ b/cmd/bypass4netns/main.go @@ -55,6 +55,7 @@ func main() { help := flag.Bool("help", false, "Show help") nsagentFlag := flag.Bool("nsagent", false, "(An internal flag. Do not use manually.)") // TODO: hide tracerAgentFlag := flag.Bool("tracer-agent", false, "(An internal flag. Do not use manually.)") // TODO: hide + memNSEnterPid := flag.Int("mem-nsenter-pid", -1, "(An internal flag. Do not use manually.)") // TODO: hide handleC2cEnable := flag.Bool("handle-c2c-connections", false, "Handle connections between containers") tracerEnable := flag.Bool("tracer", false, "Enable connection tracer") multinodeEnable := flag.Bool("multinode", false, "Enable multinode communication") @@ -85,6 +86,14 @@ func main() { os.Exit(0) } + if *memNSEnterPid > 0 { + logrus.SetOutput(os.Stdout) + if err := bypass4netns.OpenMemWithNSEnterAgent(uint32(*memNSEnterPid)); err != nil { + logrus.Fatal(err) + } + os.Exit(0) + } + if logFilePath != "" { logFile, err := os.Create(logFilePath) if err != nil { diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 409afd4..2914544 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -4,11 +4,15 @@ package bypass4netns // The code is licensed under Apache-2.0 License import ( + "bytes" gocontext "context" "encoding/json" "errors" "fmt" "net" + "os" + "os/exec" + "strconv" "syscall" "time" @@ -19,6 +23,7 @@ import ( "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/iproute2" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/nonbypassable" "github.com/rootless-containers/bypass4netns/pkg/bypass4netns/tracer" + "github.com/rootless-containers/bypass4netns/pkg/util" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -75,14 +80,13 @@ func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) { } // readProcMem read data from memory of specified pid process at the spcified offset. -func readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { +func (h *notifHandler) readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { buffer := make([]byte, len) // PATH_MAX - memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777) + memfd, err := h.openMem(pid) if err != nil { return nil, err } - defer unix.Close(memfd) size, err := unix.Pread(memfd, buffer, int64(offset)) if err != nil { @@ -93,12 +97,11 @@ func readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { } // writeProcMem writes data to memory of specified pid process at the specified offset. -func writeProcMem(pid uint32, offset uint64, buf []byte) error { - memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_WRONLY, 0o777) +func (h *notifHandler) writeProcMem(pid uint32, offset uint64, buf []byte) error { + memfd, err := h.openMem(pid) if err != nil { return err } - defer unix.Close(memfd) size, err := unix.Pwrite(memfd, buf, int64(offset)) if err != nil { @@ -111,6 +114,116 @@ func writeProcMem(pid uint32, offset uint64, buf []byte) error { return nil } +func (h *notifHandler) openMem(pid uint32) (int, error) { + if memfd, ok := h.memfds[pid]; ok { + return memfd, nil + } + memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDWR, 0o777) + if err != nil { + logrus.WithField("pid", pid).Warn("failed to open mem due to permission error. retrying with agent.") + newMemfd, err := openMemWithNSEnter(pid) + if err != nil { + return 0, fmt.Errorf("failed to open mem with agent (pid=%d)", pid) + } + logrus.WithField("pid", pid).Info("succeeded to open mem with agent. continue to process") + memfd = newMemfd + } + h.memfds[pid] = memfd + + return memfd, nil +} + +func openMemWithNSEnter(pid uint32) (int, error) { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + return 0, err + } + + // configure timeout + timeout := &syscall.Timeval{ + Sec: 0, + Usec: 500 * 1000, + } + err = syscall.SetsockoptTimeval(fds[0], syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, timeout) + if err != nil { + return 0, fmt.Errorf("failed to set receive timeout") + } + err = syscall.SetsockoptTimeval(fds[1], syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, timeout) + if err != nil { + return 0, fmt.Errorf("failed to set send timeout") + } + + fd1File := os.NewFile(uintptr(fds[0]), "") + defer fd1File.Close() + fd1Conn, err := net.FileConn(fd1File) + if err != nil { + return 0, err + } + _ = fd1Conn + + selfExe, err := os.Executable() + if err != nil { + return 0, err + } + nsenter, err := exec.LookPath("nsenter") + if err != nil { + return 0, err + } + nsenterFlags := []string{ + "-t", strconv.Itoa(int(pid)), + "-F", + } + selfPid := os.Getpid() + ok, err := util.SameUserNS(int(pid), selfPid) + if err != nil { + return 0, fmt.Errorf("failed to check sameUserNS(%d, %d)", pid, selfPid) + } + if !ok { + nsenterFlags = append(nsenterFlags, "-U", "--preserve-credentials") + } + nsenterFlags = append(nsenterFlags, "--", selfExe, fmt.Sprintf("--mem-nsenter-pid=%d", pid)) + cmd := exec.CommandContext(gocontext.TODO(), nsenter, nsenterFlags...) + cmd.ExtraFiles = []*os.File{os.NewFile(uintptr(fds[1]), "")} + stdout := bytes.Buffer{} + cmd.Stdout = &stdout + err = cmd.Start() + if err != nil { + return 0, fmt.Errorf("failed to exec mem open agent %q", err) + } + memfd, recvMsgs, err := util.RecvMsg(fd1Conn) + if err != nil { + logrus.Infof("stdout=%q", stdout.String()) + return 0, fmt.Errorf("failed to receive message") + } + logrus.Debugf("recvMsgs=%s", string(recvMsgs)) + err = cmd.Wait() + if err != nil { + return 0, err + } + + return memfd, nil +} + +func OpenMemWithNSEnterAgent(pid uint32) error { + // fd 3 should be passed socket pair + fdFile := os.NewFile(uintptr(3), "") + defer fdFile.Close() + fdConn, err := net.FileConn(fdFile) + if err != nil { + logrus.WithError(err).Fatal("failed to open conn") + } + memPath := fmt.Sprintf("/proc/%d/mem", pid) + memfd, err := unix.Open(memPath, unix.O_RDWR, 0o777) + if err != nil { + logrus.WithError(err).Fatalf("failed to open %s", memPath) + } + err = util.SendMsg(fdConn, memfd, []byte(fmt.Sprintf("opened %s", memPath))) + if err != nil { + logrus.WithError(err).Fatal("failed to send message") + } + return nil +} + func handleNewMessage(sockfd int) (uintptr, *specs.ContainerProcessState, error) { const maxNameLen = 4096 stateBuf := make([]byte, maxNameLen) @@ -203,8 +316,8 @@ func getSocketArgs(sockfd int) (int, int, int, error) { return sock_domain, sock_type, sock_protocol, nil } -func readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockaddr, error) { - buf, err := readProcMem(pid, offset, addrlen) +func (h *notifHandler) readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockaddr, error) { + buf, err := h.readProcMem(pid, offset, addrlen) if err != nil { return nil, fmt.Errorf("failed readProcMem pid %v offset 0x%x: %s", pid, offset, err) } @@ -297,6 +410,10 @@ func (h *notifHandler) handleReq(ctx *context) { // cleanup sockets when the process exit. if syscallName == "_exit" || syscallName == "exit_group" { delete(h.processes, ctx.req.Pid) + if memfd, ok := h.memfds[ctx.req.Pid]; ok { + syscall.Close(memfd) + delete(h.memfds, ctx.req.Pid) + } logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid}).Debugf("process is removed") return } @@ -319,7 +436,7 @@ func (h *notifHandler) handleReq(ctx *context) { } if syscallName == "getpeername" { - sock.handleSysGetpeername(ctx) + sock.handleSysGetpeername(h, ctx) } switch sock.state { @@ -335,7 +452,7 @@ func (h *notifHandler) handleReq(ctx *context) { case "connect": sock.handleSysConnect(h, ctx) case "setsockopt": - sock.handleSysSetsockopt(ctx) + sock.handleSysSetsockopt(h, ctx) case "fcntl": sock.handleSysFcntl(ctx) case "getpeername": @@ -484,6 +601,9 @@ type notifHandler struct { containerInterfaces map[string]containerInterface c2cConnections *C2CConnectionHandleConfig multinode *MultinodeConfig + + // cache /proc//mem's fd to reduce latency. key is pid, value is fd + memfds map[uint32]int } type containerInterface struct { @@ -498,6 +618,7 @@ func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState state: state, forwardingPorts: map[int]ForwardPortMapping{}, processes: map[uint32]*processStatus{}, + memfds: map[uint32]int{}, } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index c9c5839..1789340 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -97,12 +97,12 @@ func newSocketStatus(pid uint32, sockfd int, sockDomain, sockType, sockProto int } } -func (ss *socketStatus) handleSysSetsockopt(ctx *context) { +func (ss *socketStatus) handleSysSetsockopt(handler *notifHandler, ctx *context) { ss.logger.Debug("handle setsockopt") level := ctx.req.Data.Args[1] optname := ctx.req.Data.Args[2] optlen := ctx.req.Data.Args[4] - optval, err := readProcMem(ctx.req.Pid, ctx.req.Data.Args[3], optlen) + optval, err := handler.readProcMem(ctx.req.Pid, ctx.req.Data.Args[3], optlen) if err != nil { ss.logger.Errorf("setsockopt readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) } @@ -138,7 +138,7 @@ func (ss *socketStatus) handleSysFcntl(ctx *context) { } func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { - destAddr, err := readSockaddrFromProcess(ss.pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + destAddr, err := handler.readSockaddrFromProcess(ss.pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) if err != nil { ss.logger.Errorf("failed to read sockaddr from process: %q", err) return @@ -255,7 +255,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { binary.BigEndian.PutUint16(p, uint16(fwdPort.HostPort)) // writing host port at sock_addr's port offset // TODO: should we return dummy value when getpeername(2) is called? - err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+2, p) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+2, p) if err != nil { ss.logger.Errorf("failed to rewrite destination port: %q", err) ss.state = Error @@ -270,10 +270,10 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { switch destAddr.Family { case syscall.AF_INET: newDestAddr = newDestAddr.To4() - err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, newDestAddr[0:4]) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+4, newDestAddr[0:4]) case syscall.AF_INET6: newDestAddr = newDestAddr.To16() - err = writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, newDestAddr[0:16]) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1]+8, newDestAddr[0:16]) default: ss.logger.Errorf("unexpected destination address family %d", destAddr.Family) ss.state = Error @@ -293,7 +293,7 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { } func (ss *socketStatus) handleSysBind(handler *notifHandler, ctx *context) { - sa, err := readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) + sa, err := handler.readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) if err != nil { ss.logger.Errorf("failed to read sockaddr from process: %q", err) ss.state = NotBypassable @@ -378,7 +378,7 @@ func (ss *socketStatus) handleSysBind(handler *notifHandler, ctx *context) { ctx.resp.Flags &= (^uint32(SeccompUserNotifFlagContinue)) } -func (ss *socketStatus) handleSysGetpeername(ctx *context) { +func (ss *socketStatus) handleSysGetpeername(handler *notifHandler, ctx *context) { if ss.addr == nil { return } @@ -389,7 +389,7 @@ func (ss *socketStatus) handleSysGetpeername(ctx *context) { return } - err = writeProcMem(ss.pid, ctx.req.Data.Args[1], buf) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[1], buf) if err != nil { ss.logger.WithError(err).Errorf("failed to write address %s", ss.addr) return @@ -397,7 +397,7 @@ func (ss *socketStatus) handleSysGetpeername(ctx *context) { bufLen := make([]byte, 4) binary.LittleEndian.PutUint32(bufLen, uint32(len(buf))) - err = writeProcMem(ss.pid, ctx.req.Data.Args[2], bufLen) + err = handler.writeProcMem(ss.pid, ctx.req.Data.Args[2], bufLen) if err != nil { ss.logger.WithError(err).Errorf("failed to write address length %d", len(buf)) return diff --git a/pkg/util/util.go b/pkg/util/util.go index 3f80de4..2aaf4af 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -2,7 +2,9 @@ package util import ( "fmt" + "net" "os" + "syscall" ) // shrinkID shrinks id to short(12 chars) id @@ -30,3 +32,56 @@ func SameUserNS(pidX, pidY int) (bool, error) { } return nsXResolved == nsYResolved, nil } + +// copied from https://github.com/pfnet-research/meta-fuse-csi-plugin/blob/437dbbbbf16e5b02f9a508e3403d044b0a9dff89/pkg/util/fdchannel.go#L29 +// which is licensed under apache 2.0 +func SendMsg(via net.Conn, fd int, msg []byte) error { + conn, ok := via.(*net.UnixConn) + if !ok { + return fmt.Errorf("failed to cast via to *net.UnixConn") + } + connf, err := conn.File() + if err != nil { + return err + } + socket := int(connf.Fd()) + defer connf.Close() + + rights := syscall.UnixRights(fd) + + return syscall.Sendmsg(socket, msg, rights, nil, 0) +} + +func RecvMsg(via net.Conn) (int, []byte, error) { + conn, ok := via.(*net.UnixConn) + if !ok { + return 0, nil, fmt.Errorf("failed to cast via to *net.UnixConn") + } + connf, err := conn.File() + if err != nil { + return 0, nil, err + } + socket := int(connf.Fd()) + defer connf.Close() + + buf := make([]byte, syscall.CmsgSpace(4)) + b := make([]byte, 500) + //nolint:dogsled + n, _, _, _, err := syscall.Recvmsg(socket, b, buf, 0) + if err != nil { + return 0, nil, err + } + + var msgs []syscall.SocketControlMessage + msgs, err = syscall.ParseSocketControlMessage(buf) + if err != nil { + return 0, nil, err + } + + fds, err := syscall.ParseUnixRights(&msgs[0]) + if err != nil { + return 0, nil, err + } + + return fds[0], b[:n], err +} From 0fd3f7d3c2ef6daa3391cbd0900fee5edf4424bb Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sun, 26 Nov 2023 13:27:38 +0000 Subject: [PATCH 27/55] add PostgreSQL benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 4 ++ benchmark/postgres.sh | 97 +++++++++++++++++++++++++++++++++ benchmark/postgres_multinode.sh | 58 ++++++++++++++++++++ 3 files changed, 159 insertions(+) create mode 100755 benchmark/postgres.sh create mode 100755 benchmark/postgres_multinode.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3391743..a2c2891 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -54,6 +54,10 @@ jobs: run: ./test/multinode.sh - name: benchmark (redis) run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/redis.sh + - name: benchmark (postgresql) + run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/postgres.sh - name: multinode benchmark (redis) run: ./benchmark/redis_multinode.sh + - name: multinode benchmark (postgresql) + run: ./benchmark/postgres_multinode.sh diff --git a/benchmark/postgres.sh b/benchmark/postgres.sh new file mode 100755 index 0000000..5457336 --- /dev/null +++ b/benchmark/postgres.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +set -eu -o pipefail + +POSTGRES_VERSION=16.1 +POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" + +source ~/.profile +cd $(dirname $0) +. ../util.sh + +nerdctl pull --quiet $POSTGRES_IMAGE + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f psql-server + nerdctl rm -f psql-client + set -ex + + nerdctl run -d --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(nerdctl exec psql-server hostname -i) + PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') + NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 5432; do sleep 1; done" + nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres + + nerdctl rm -f psql-server + nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f psql-server + nerdctl rm -f psql-client + set -ex + + nerdctl run -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(hostname -I | awk '{print $1}') + sleep 5 + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres + + nerdctl rm -f psql-server + nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(hostname -I | awk '{print $1}') + PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') + NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 15432; do sleep 1; done" + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres + + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: postgres client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user reset-failed + set -ex + + HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(nerdctl exec psql-server hostname -i) + sleep 5 + nerdctl exec psql-client pgbench -h $SERVER_IP -p 5432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 5432 -U postgres -s 10 -t 1000 postgres + + nerdctl rm -f psql-server + nerdctl rm -f psql-client + systemctl --user stop run-bypass4netnsd +) diff --git a/benchmark/postgres_multinode.sh b/benchmark/postgres_multinode.sh new file mode 100755 index 0000000..53b5be6 --- /dev/null +++ b/benchmark/postgres_multinode.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +cd $(dirname $0) +. ../util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f psql-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +POSTGRES_VERSION=16.1 +POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" + +set -eux -o pipefail + +NAME="test" exec_lxc nerdctl pull --quiet $POSTGRES_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name psql-server -e POSTGRES_PASSWORD=pass -d $POSTGRES_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name psql-client -e PGPASSWORD=pass -d $POSTGRES_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -i postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres + + NAME="test" exec_lxc nerdctl rm -f psql-server + NAME="test2" exec_lxc nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec psql-server hostname -i) + sleep 5 + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres + + NAME="test" exec_lxc nerdctl rm -f psql-server + NAME="test2" exec_lxc nerdctl rm -f psql-client +) \ No newline at end of file From 442ba536fbe71046fa2108fad2de949901d9c1ca Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Mon, 27 Nov 2023 04:39:45 +0000 Subject: [PATCH 28/55] benchmark/redis: output logs as csv Signed-off-by: Naoki MATSUMOTO --- benchmark/redis.sh | 8 ++++---- benchmark/redis_multinode.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/redis.sh b/benchmark/redis.sh index f4b7545..01993d3 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -20,7 +20,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv nerdctl rm -f redis-server nerdctl rm -f redis-client ) @@ -35,7 +35,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv nerdctl rm -f redis-server nerdctl rm -f redis-client ) @@ -54,7 +54,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv nerdctl rm -f redis-server nerdctl rm -f redis-client @@ -77,7 +77,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) # without 'sleep 1', benchmark is not performed.(race condition?) - nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP -p 6379" + nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP -p 6379 --csv" nerdctl rm -f redis-server nerdctl rm -f redis-client diff --git a/benchmark/redis_multinode.sh b/benchmark/redis_multinode.sh index 5c4d683..45503c6 100755 --- a/benchmark/redis_multinode.sh +++ b/benchmark/redis_multinode.sh @@ -33,7 +33,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) w NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR - NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client @@ -48,7 +48,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE" SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" - NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP" + NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client From 3ba037da52d72295b9fa52a2853353f2c0f50518 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Mon, 27 Nov 2023 07:23:09 +0000 Subject: [PATCH 29/55] add iperf3 benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 4 ++ benchmark/iperf3.sh | 122 ++++++++++++++++++++++++++++++++++ benchmark/iperf3_multinode.sh | 62 +++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100755 benchmark/iperf3.sh create mode 100755 benchmark/iperf3_multinode.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a2c2891..67d0c9d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -52,10 +52,14 @@ jobs: run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/test/run_test.sh - name: run multinode test run: ./test/multinode.sh + - name: benchmark (iperf3) + run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/iperf3.sh - name: benchmark (redis) run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/redis.sh - name: benchmark (postgresql) run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/postgres.sh + - name: multinode benchmark (iperf3) + run: ./benchmark/iperf3_multinode.sh - name: multinode benchmark (redis) run: ./benchmark/redis_multinode.sh - name: multinode benchmark (postgresql) diff --git a/benchmark/iperf3.sh b/benchmark/iperf3.sh new file mode 100755 index 0000000..526b6c0 --- /dev/null +++ b/benchmark/iperf3.sh @@ -0,0 +1,122 @@ +#!/bin/bash + +set -eu -o pipefail + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +source ~/.profile + +nerdctl pull --quiet $ALPINE_IMAGE + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name iperf3-server $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(nerdctl exec iperf3-server hostname -i) + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(hostname -I | awk '{print $1}') + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(hostname -I | awk '{print $1}') + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-server apk add --no-cache iperf3 + nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity + nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(nerdctl exec iperf3-server hostname -i) + sleep 1 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + + nerdctl rm -f iperf3-server + nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) diff --git a/benchmark/iperf3_multinode.sh b/benchmark/iperf3_multinode.sh new file mode 100755 index 0000000..786451d --- /dev/null +++ b/benchmark/iperf3_multinode.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +cd $(dirname $0) +. ../util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f iperf3-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" + +set -eux -o pipefail + +NAME="test" exec_lxc nerdctl pull --quiet $ALPINE_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 + + NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 + + NAME="test" exec_lxc nerdctl rm -f iperf3-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f iperf3-client +) + +echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 5202:5201 --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 + + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec iperf3-server hostname -i) + NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + + NAME="test" exec_lxc nerdctl rm -f iperf3-server + NAME="test2" exec_lxc nerdctl rm -f iperf3-client +) \ No newline at end of file From 9ccca762e2a066f5ba036f5115976e92c2629a13 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Mon, 27 Nov 2023 07:56:27 +0000 Subject: [PATCH 30/55] run test and benchmarks parallel Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 150 ++++++++++++++++++++++++++++-------- benchmark/iperf3.sh | 3 + benchmark/iperf3_host.sh | 72 +++++++++++++++++ benchmark/postgres.sh | 4 + benchmark/redis.sh | 6 +- export_lxc_image.sh | 9 +++ launch_test_lxc.sh | 5 +- setup_lxd.sh | 15 ++++ test/init_test.sh | 6 -- test/run_test.sh | 81 +++---------------- 10 files changed, 239 insertions(+), 112 deletions(-) create mode 100755 benchmark/iperf3_host.sh create mode 100755 export_lxc_image.sh create mode 100755 setup_lxd.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 67d0c9d..78cafb5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -24,44 +24,128 @@ jobs: with: version: v1.49.0 args: --verbose - ubuntu-2204-on-lxc: - name: Ubuntu 22.04 on LXC + + create-lxc-image: + name: create-lxc-image runs-on: ubuntu-22.04 - timeout-minutes: 20 + timeout-minutes: 10 steps: - uses: actions/checkout@v3.0.2 - - name: install lxd (v5.19) - run: sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 - - run: sudo modprobe vxlan - - name: configure lxd - run: cat test/lxd.yaml | sudo lxd init --preseed && sudo sysctl -w net.ipv4.ip_forward=1 - # thanks to https://andreas.scherbaum.la/post/2023-01-18_fix-lxc-network-issues-in-ubuntu-22.04/ - - name: Disable Docker Firewall 1 - run: sudo iptables -I DOCKER-USER -i lxdbr0 -o eth0 -j ACCEPT - - name: Disable Docker Firewall 2 - run: sudo iptables -I DOCKER-USER -o lxdbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT - - name: Disable Docker Firewall 3 - run: sudo iptables -F FORWARD && sudo iptables -P FORWARD ACCEPT - #- name: debug - # run: ./debug.sh + + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + lookup-only: true + + - name: setup lxd (v5.19) + id: s1 + if: steps.cache-restore.outputs.cache-hit != 'true' + run: ./setup_lxd.sh + - name: launch lxc container + id: s6 + if: steps.s1.conclusion == 'success' run: ./launch_test_lxc.sh + - name: install dependencies and build + id: s7 + if: steps.s6.conclusion == 'success' run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh - - name: run tests - run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/test/run_test.sh - - name: run multinode test - run: ./test/multinode.sh - - name: benchmark (iperf3) - run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/iperf3.sh - - name: benchmark (redis) - run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/redis.sh - - name: benchmark (postgresql) - run: sudo lxc exec test -- sudo --login --user ubuntu /home/ubuntu/bypass4netns/benchmark/postgres.sh - - name: multinode benchmark (iperf3) - run: ./benchmark/iperf3_multinode.sh - - name: multinode benchmark (redis) - run: ./benchmark/redis_multinode.sh - - name: multinode benchmark (postgresql) - run: ./benchmark/postgres_multinode.sh + - name: export image + id: s8 + if: steps.s7.conclusion == 'success' + run: ./export_lxc_image.sh test + + - uses: actions/cache/save@v3 + id: s11 + if: steps.s8.conclusion == 'success' + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + + test: + runs-on: ubuntu-22.04 + needs: create-lxc-image + timeout-minutes: 20 + steps: + - uses: actions/checkout@v3.0.2 + - name: setup lxd (v5.19) + run: ./setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./launch_test_lxc.sh test-export + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service + - name: run test + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh" + # some source codes may be updated. re-export new image. + - name: export image + run: sudo lxc image alias delete test-export && rm -f /tmp/test-image.tar.zst && ./export_lxc_image.sh test + - uses: actions/cache/save@v3 + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + #- name: debug + # run: ./debug.sh + + benchmark: + runs-on: ubuntu-22.04 + needs: test + timeout-minutes: 20 + strategy: + matrix: + script: ["iperf3_host.sh", "iperf3.sh", "postgres.sh", "redis.sh"] + steps: + - uses: actions/checkout@v3.0.2 + - name: setup lxd (v5.19) + run: ./setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./launch_test_lxc.sh test-export + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service + - name: run benchmark (${{ matrix.script }}) + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}" + + benchmark-multinode: + runs-on: ubuntu-22.04 + needs: test + timeout-minutes: 20 + strategy: + matrix: + script: ["iperf3_multinode.sh", "postgres_multinode.sh", "redis_multinode.sh"] + steps: + - uses: actions/checkout@v3.0.2 + - name: setup lxd (v5.19) + run: ./setup_lxd.sh + - uses: actions/cache/restore@v3 + id: cache-restore + with: + key: lxc-image-${{ github.sha }} + path: /tmp/test-image.tar.zst + fail-on-cache-miss: true + - name: load lxc image + run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export + - name: launch lxc container + run: ./launch_test_lxc.sh test-export + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service + - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service + - name: run benchmark (${{ matrix.script }}) + run: ./benchmark/${{ matrix.script }} \ No newline at end of file diff --git a/benchmark/iperf3.sh b/benchmark/iperf3.sh index 526b6c0..cd203f5 100755 --- a/benchmark/iperf3.sh +++ b/benchmark/iperf3.sh @@ -97,10 +97,12 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) wi nerdctl rm -f iperf3-client systemctl --user stop iperf3-server systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service systemctl --user reset-failed set -ex HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity @@ -118,5 +120,6 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) wi nerdctl rm -f iperf3-client systemctl --user stop iperf3-server systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service systemctl --user reset-failed ) diff --git a/benchmark/iperf3_host.sh b/benchmark/iperf3_host.sh new file mode 100755 index 0000000..dd1a80b --- /dev/null +++ b/benchmark/iperf3_host.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +set -eu -o pipefail + +source ~/.profile + +ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" +nerdctl pull --quiet "${ALPINE_IMAGE}" + +HOST_IP=$(hostname -I | awk '{print $1}') +systemd-run --user --unit run-iperf3 iperf3 -s + +echo "===== Benchmark: netns -> host With bypass4netns =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + # start bypass4netnsd for nerdctl integration + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + sleep 1 + nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $HOST_IP + nerdctl rm -f test +) + +echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" +( + set +e + nerdctl rm -f test + set -ex + + nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + nerdctl exec test iperf3 -c $HOST_IP + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns With bypass4netns =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns + systemctl --user reset-failed + set -ex + + nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 + sleep 1 # waiting `iperf3 -s -4` becomes ready + iperf3 -c $HOST_IP -p 8080 + nerdctl rm -f test +) + +echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" +( + set +e + nerdctl rm -f test + systemctl --user stop run-iperf3-netns2 + systemctl --user reset-failed + set -ex + + nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity + nerdctl exec test apk add --no-cache iperf3 + systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 + sleep 1 + iperf3 -c $HOST_IP -p 8080 + nerdctl rm -f test +) diff --git a/benchmark/postgres.sh b/benchmark/postgres.sh index 5457336..11cab6f 100755 --- a/benchmark/postgres.sh +++ b/benchmark/postgres.sh @@ -78,10 +78,12 @@ echo "===== Benchmark: postgres client(w/ bypass4netns) server(w/ bypass4netns) systemctl --user stop run-bypass4netnsd nerdctl rm -f psql-server nerdctl rm -f psql-client + systemctl --user stop etcd.service systemctl --user reset-failed set -ex HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE @@ -94,4 +96,6 @@ echo "===== Benchmark: postgres client(w/ bypass4netns) server(w/ bypass4netns) nerdctl rm -f psql-server nerdctl rm -f psql-client systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed ) diff --git a/benchmark/redis.sh b/benchmark/redis.sh index 01993d3..ef41cc4 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -64,13 +64,15 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" ( set +e - systemctl --user stop run-bypass4netnsd nerdctl rm -f redis-server nerdctl rm -f redis-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service systemctl --user reset-failed set -ex HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE @@ -82,4 +84,6 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit nerdctl rm -f redis-server nerdctl rm -f redis-client systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed ) \ No newline at end of file diff --git a/export_lxc_image.sh b/export_lxc_image.sh new file mode 100755 index 0000000..0467b83 --- /dev/null +++ b/export_lxc_image.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eux -o pipefail + +IMAGE_NAME=$1 + +sudo lxc snapshot $IMAGE_NAME snp0 +sudo lxc publish $IMAGE_NAME/snp0 --alias $IMAGE_NAME-export --compression zstd +sudo lxc image export $IMAGE_NAME-export /tmp/$IMAGE_NAME-image diff --git a/launch_test_lxc.sh b/launch_test_lxc.sh index 4918361..30f7141 100755 --- a/launch_test_lxc.sh +++ b/launch_test_lxc.sh @@ -1,9 +1,12 @@ #!/bin/bash +set -ux -o pipefail +IMAGE=${1:-"images:ubuntu/22.04"} + cd $(dirname $0) # lxd init --auto --storage-backend=btrfs -sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test +sudo lxc launch -c security.nesting=true $IMAGE test sudo lxc config device add test share disk source=$(pwd) path=/host sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" # let user services running diff --git a/setup_lxd.sh b/setup_lxd.sh new file mode 100755 index 0000000..6c77143 --- /dev/null +++ b/setup_lxd.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -eux -o pipefail + +cd $(dirname $0) + +#sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 +sudo modprobe vxlan +cat test/lxd.yaml | sudo lxd init --preseed +sudo sysctl -w net.ipv4.ip_forward=1 +#https://andreas.scherbaum.la/post/2023-01-18_fix-lxc-network-issues-in-ubuntu-22.04/ +sudo iptables -I DOCKER-USER -i lxdbr0 -o eth0 -j ACCEPT +sudo iptables -I DOCKER-USER -o lxdbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +sudo iptables -F FORWARD +sudo iptables -P FORWARD ACCEPT diff --git a/test/init_test.sh b/test/init_test.sh index 9693dbf..576b174 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -23,8 +23,6 @@ echo "===== Prepare =====" sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd jq tcpdump ethtool sudo systemctl stop etcd sudo systemctl disable etcd - HOST_IP=$(hostname -I | sed 's/ //') - systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemctl --user start dbus @@ -61,8 +59,4 @@ EOF make sudo rm -f /usr/local/bin/bypass4netns* sudo make install - - hostname -I | awk '{print $1}' | tee /tmp/host_ip - ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json - ) diff --git a/test/run_test.sh b/test/run_test.sh index 7685d58..daedcb5 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -8,12 +8,9 @@ ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" nerdctl pull --quiet "${ALPINE_IMAGE}" SCRIPT_DIR=$(cd $(dirname $0); pwd) -cd $SCRIPT_DIR - set +u - if [ ! -v 1 ]; then - echo "COPY" + echo "updating source code" rm -rf ~/bypass4netns sudo cp -r /host ~/bypass4netns sudo chown -R ubuntu:ubuntu ~/bypass4netns @@ -21,15 +18,12 @@ if [ ! -v 1 ]; then exec $0 "FORK" exit 0 fi - -set -u - -echo "THIS IS FORK" - +echo "source code is updated" cd ~/bypass4netns rm -f bypass4netns bypass4netnsd make sudo make install +set -u cd $SCRIPT_DIR set +e @@ -39,6 +33,8 @@ sleep 1 set -e systemd-run --user --unit run-iperf3 iperf3 -s +HOST_IP=$(hostname -I | awk '{print $1}') +~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json echo "===== '--ignore' option test =====" ( @@ -50,7 +46,7 @@ echo "===== '--ignore' option test =====" systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8,192.168.6.0/24" --debug nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test "${ALPINE_IMAGE}" sleep infinity nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c $(cat /tmp/host_ip) -t 1 + nerdctl exec test iperf3 -c $HOST_IP -t 1 # TODO: this check is dirty. we want better method to check the connect(2) is ignored. journalctl --user -u run-bypass4netns.service | grep "is not bypassed" nerdctl rm -f test @@ -152,14 +148,16 @@ echo "===== tracer test (enabled) =====" echo "===== multinode test (single node) ====" ( set +e - systemctl --user stop run-bypass4netnsd nerdctl rm -f test1 nerdctl rm -f test2 nerdctl network rm net-2 + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service systemctl --user reset-failed set -ex HOST_IP=$(hostname -I | sed 's/ //') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP --debug sleep 1 nerdctl run --label nerdctl/bypass4netns=true -d -p 8080:5201 --name test1 "${ALPINE_IMAGE}" sleep infinity @@ -175,65 +173,6 @@ echo "===== multinode test (single node) ====" nerdctl rm -f test2 nerdctl network rm net-2 systemctl --user stop run-bypass4netnsd -) - -echo "===== Benchmark: netns -> host With bypass4netns =====" -( - set +e - nerdctl rm -f test - systemctl --user stop run-bypass4netnsd - systemctl --user reset-failed - set -ex - - # start bypass4netnsd for nerdctl integration - systemd-run --user --unit run-bypass4netnsd bypass4netnsd - sleep 1 - nerdctl run --label nerdctl/bypass4netns=true -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test -) - -echo "===== Benchmark: netns -> host Without bypass4netns (for comparison) =====" -( - set +e - nerdctl rm -f test - set -ex - - nerdctl run -d --name test "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - nerdctl exec test iperf3 -c "$(cat /tmp/host_ip)" - nerdctl rm -f test -) - -echo "===== Benchmark: host -> netns With bypass4netns =====" -( - set +e - nerdctl rm -f test - systemctl --user stop run-iperf3-netns + systemctl --user stop etcd.service systemctl --user reset-failed - set -ex - - nerdctl run --label nerdctl/bypass4netns=true -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns nerdctl exec test iperf3 -s -4 - sleep 1 # waiting `iperf3 -s -4` becomes ready - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test ) - -echo "===== Benchmark: host -> netns Without bypass4netns (for comparison) =====" -( - set +e - nerdctl rm -f test - systemctl --user stop run-iperf3-netns2 - systemctl --user reset-failed - set -ex - - nerdctl run -d --name test -p 8080:5201 "${ALPINE_IMAGE}" sleep infinity - nerdctl exec test apk add --no-cache iperf3 - systemd-run --user --unit run-iperf3-netns2 nerdctl exec test iperf3 -s -4 - sleep 1 - iperf3 -c "$(cat /tmp/host_ip)" -p 8080 - nerdctl rm -f test -) \ No newline at end of file From ebce700a59f4b5a73afdd681d2a194c871c00a57 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 09:15:18 +0000 Subject: [PATCH 31/55] plot redis benchmark result Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 13 ++++++++-- benchmark/.gitignore | 2 ++ benchmark/redis.sh | 20 +++++++++++---- benchmark/redis_plot.py | 50 +++++++++++++++++++++++++++++++++++++ test/init_test.sh | 3 ++- 5 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 benchmark/.gitignore create mode 100644 benchmark/redis_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 78cafb5..a471076 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3_host.sh", "iperf3.sh", "postgres.sh", "redis.sh"] + script: ["iperf3_host", "iperf3", "postgres", "redis"] steps: - uses: actions/checkout@v3.0.2 - name: setup lxd (v5.19) @@ -122,7 +122,16 @@ jobs: - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run benchmark (${{ matrix.script }}) - run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}" + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" + - name: upload plot + id: get_plot + if: matrix.script == 'redis' + run: sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.png /tmp/${{ matrix.script }}.png + - uses: actions/upload-artifact@v3 + if: steps.get_plot.conclusion == 'success' + with: + path: /tmp/${{ matrix.script }}.png + benchmark-multinode: runs-on: ubuntu-22.04 diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..8ec9dfd --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,2 @@ +*.csv +*.png \ No newline at end of file diff --git a/benchmark/redis.sh b/benchmark/redis.sh index ef41cc4..73c662a 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -1,8 +1,8 @@ #!/bin/bash - - set -eu -o pipefail +cd $(dirname $0) + REDIS_VERSION=7.2.3 REDIS_IMAGE="redis:${REDIS_VERSION}" @@ -20,7 +20,9 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-wo-b4ns-direct.csv + cat redis-wo-b4ns-direct.csv + nerdctl rm -f redis-server nerdctl rm -f redis-client ) @@ -35,7 +37,9 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-wo-b4ns-host.csv + cat redis-wo-b4ns-host.csv + nerdctl rm -f redis-server nerdctl rm -f redis-client ) @@ -54,7 +58,8 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-w-b4ns.csv + cat redis-w-b4ns.csv nerdctl rm -f redis-server nerdctl rm -f redis-client @@ -86,4 +91,9 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit systemctl --user stop run-bypass4netnsd systemctl --user stop etcd.service systemctl --user reset-failed +) + +echo "===== Visualize benchmark: redis =====" +( + python3 redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-w-b4ns.csv redis.png ) \ No newline at end of file diff --git a/benchmark/redis_plot.py b/benchmark/redis_plot.py new file mode 100644 index 0000000..3de9542 --- /dev/null +++ b/benchmark/redis_plot.py @@ -0,0 +1,50 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + reader = csv.reader(f) + next(reader, None) + for row in reader: + data[row[0]] = float(row[1]) + return data + +BAR_WIDTH=0.25 + + +data_wo_b4ns_direct = load_data(sys.argv[1]) +data_wo_b4ns_host = load_data(sys.argv[2]) +data_w_b4ns = load_data(sys.argv[3]) + +labels_for_data=['PING_INLINE', 'PING_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH (needed to benchmark LRANGE)', 'LRANGE_100 (first 100 elements)', 'LRANGE_300 (first 300 elements)', 'LRANGE_500 (first 500 elements)', 'LRANGE_600 (first 600 elements)', 'MSET (10 keys)', 'XADD'] + +value_wo_b4ns_direct = [] +value_wo_b4ns_host = [] +value_w_b4ns = [] +for l in labels_for_data: + value_wo_b4ns_direct.append(data_wo_b4ns_direct[l]) + value_wo_b4ns_host.append(data_wo_b4ns_host[l]) + value_w_b4ns.append(data_w_b4ns[l]) + +labels=['PING\n_INLINE', 'PING\n_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH', 'LRANGE\n_100', 'LRANGE\n_300', 'LRANGE\n_500', 'LRANGE\n_600', 'MSET\n(10 keys)', 'XADD'] +print(value_wo_b4ns_direct) +print(value_wo_b4ns_host) +print(value_w_b4ns) + + +plt.rcParams["figure.figsize"] = (20,4) + +plt.ylabel("Request / seconds") +plt.bar([x for x in range(0, len(labels))], value_wo_b4ns_direct, align="edge", edgecolor="black", linewidth=1, hatch='//', width=BAR_WIDTH, label='w/o bypass4netns(direct)') +plt.bar([x+BAR_WIDTH for x in range(0, len(labels))], value_wo_b4ns_host, align="edge", edgecolor="black", linewidth=1, hatch='//', width=BAR_WIDTH, label='w/o bypass4netns(via host)') +plt.bar([x+BAR_WIDTH*2 for x in range(0, len(labels))], value_w_b4ns, align="edge", edgecolor="black", linewidth=1, hatch='++', width=BAR_WIDTH, label='w/ bypass4netns(via host)') + +plt.legend() +plt.xlim(0, len(labels)+BAR_WIDTH*3-1) +plt.xticks([x+BAR_WIDTH*1.5 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[4]) diff --git a/test/init_test.sh b/test/init_test.sh index 576b174..8867ad3 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -20,7 +20,8 @@ echo "===== Prepare =====" sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd jq tcpdump ethtool + sudo DEBIAN_FRONTEND=noninteractive apt-get install -q -y build-essential curl dbus-user-session iperf3 libseccomp-dev uidmap python3 pkg-config iptables etcd jq tcpdump ethtool python3-pip + pip3 install matplotlib numpy sudo systemctl stop etcd sudo systemctl disable etcd From 456fd952842671b4d6a7657c635104ded16c34dd Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 09:42:43 +0000 Subject: [PATCH 32/55] bump golangci/golangci-lint-action Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a471076..560b000 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,9 +20,9 @@ jobs: go-version: 1.19.x - run: sudo apt-get update && sudo apt-get install -y libseccomp-dev - name: golangci-lint - uses: golangci/golangci-lint-action@v3.2.0 + uses: golangci/golangci-lint-action@v3.7.0 with: - version: v1.49.0 + version: v1.55.2 args: --verbose create-lxc-image: From 9fa097abb81bb9d80e6f54d3220142c157b9ea55 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 09:48:20 +0000 Subject: [PATCH 33/55] bump actions/checkout Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 560b000..446fd37 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-22.04 timeout-minutes: 20 steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 with: fetch-depth: 1 - uses: actions/setup-go@v3 @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 - uses: actions/cache/restore@v3 id: cache-restore @@ -71,7 +71,7 @@ jobs: needs: create-lxc-image timeout-minutes: 20 steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) run: ./setup_lxd.sh - uses: actions/cache/restore@v3 @@ -106,7 +106,7 @@ jobs: matrix: script: ["iperf3_host", "iperf3", "postgres", "redis"] steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) run: ./setup_lxd.sh - uses: actions/cache/restore@v3 @@ -141,7 +141,7 @@ jobs: matrix: script: ["iperf3_multinode.sh", "postgres_multinode.sh", "redis_multinode.sh"] steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) run: ./setup_lxd.sh - uses: actions/cache/restore@v3 From be2719352cb1f794e0cf7ffbd74edf05cd08b521 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 11:04:10 +0000 Subject: [PATCH 34/55] joint single/multinode benchmark results into one plot Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 43 ++++++++++++++++++++++++++++++++---- benchmark/redis.sh | 5 ----- benchmark/redis_multinode.sh | 6 ++--- benchmark/redis_plot.py | 38 +++++++++++-------------------- 4 files changed, 55 insertions(+), 37 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 446fd37..410acea 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -126,11 +126,16 @@ jobs: - name: upload plot id: get_plot if: matrix.script == 'redis' - run: sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.png /tmp/${{ matrix.script }}.png + run: | + mkdir /tmp/benchmark-results + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.csv /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-host.csv /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-w-b4ns.csv /tmp/benchmark-results/. - uses: actions/upload-artifact@v3 if: steps.get_plot.conclusion == 'success' with: - path: /tmp/${{ matrix.script }}.png + name: benchmark-results + path: /tmp/benchmark-results benchmark-multinode: @@ -139,7 +144,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3_multinode.sh", "postgres_multinode.sh", "redis_multinode.sh"] + script: ["iperf3", "postgres", "redis"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -157,4 +162,34 @@ jobs: - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run benchmark (${{ matrix.script }}) - run: ./benchmark/${{ matrix.script }} \ No newline at end of file + run: ./benchmark/${{ matrix.script }}_multinode.sh + - name: upload plot + id: get_plot + if: matrix.script == 'redis' + run: | + mkdir /tmp/benchmark-results + sudo lxc file pull test2/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-multinode-wo-b4ns.csv /tmp/benchmark-results/. + sudo lxc file pull test2/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-multinode-w-b4ns.csv /tmp/benchmark-results/. + - uses: actions/upload-artifact@v3 + if: steps.get_plot.conclusion == 'success' + with: + name: benchmark-results + path: /tmp/benchmark-results + + plot: + runs-on: ubuntu-22.04 + needs: [benchmark, benchmark-multinode] + steps: + - uses: actions/checkout@v4.1.1 + - run: sudo apt update && sudo apt install python3 python3-pip + - run: pip3 install matplotlib numpy + - uses: actions/download-artifact@v3 + with: + name: benchmark-results + path: ./ + - run: mkdir /tmp/benchmark-plots + - run: python3 benchmark/redis_plot.py python3 redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-multinode-wo-b4ns.csv redis-w-b4ns.csv redis-multinode-w-b4ns.csv /tmp/benchmark-plots/redis.png + - uses: actions/upload-artifact@v3 + with: + name: benchmark-plots + path: /tmp/benchmark-plots diff --git a/benchmark/redis.sh b/benchmark/redis.sh index 73c662a..caa6298 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -92,8 +92,3 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit systemctl --user stop etcd.service systemctl --user reset-failed ) - -echo "===== Visualize benchmark: redis =====" -( - python3 redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-w-b4ns.csv redis.png -) \ No newline at end of file diff --git a/benchmark/redis_multinode.sh b/benchmark/redis_multinode.sh index 45503c6..881aece 100755 --- a/benchmark/redis_multinode.sh +++ b/benchmark/redis_multinode.sh @@ -33,7 +33,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) w NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR - NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv + NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-wo-b4ns.csv NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client @@ -48,8 +48,8 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE" SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" - NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" + NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" > redis-multinode-w-b4ns.csv NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client -) \ No newline at end of file +) diff --git a/benchmark/redis_plot.py b/benchmark/redis_plot.py index 3de9542..5b3b777 100644 --- a/benchmark/redis_plot.py +++ b/benchmark/redis_plot.py @@ -15,36 +15,24 @@ def load_data(filename): BAR_WIDTH=0.25 - -data_wo_b4ns_direct = load_data(sys.argv[1]) -data_wo_b4ns_host = load_data(sys.argv[2]) -data_w_b4ns = load_data(sys.argv[3]) - labels_for_data=['PING_INLINE', 'PING_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH (needed to benchmark LRANGE)', 'LRANGE_100 (first 100 elements)', 'LRANGE_300 (first 300 elements)', 'LRANGE_500 (first 500 elements)', 'LRANGE_600 (first 600 elements)', 'MSET (10 keys)', 'XADD'] - -value_wo_b4ns_direct = [] -value_wo_b4ns_host = [] -value_w_b4ns = [] -for l in labels_for_data: - value_wo_b4ns_direct.append(data_wo_b4ns_direct[l]) - value_wo_b4ns_host.append(data_wo_b4ns_host[l]) - value_w_b4ns.append(data_w_b4ns[l]) - labels=['PING\n_INLINE', 'PING\n_MBULK', 'SET', 'GET', 'INCR', 'LPUSH', 'RPUSH', 'LPOP', 'RPOP', 'SADD', 'HSET', 'SPOP', 'ZADD', 'ZPOPMIN', 'LPUSH', 'LRANGE\n_100', 'LRANGE\n_300', 'LRANGE\n_500', 'LRANGE\n_600', 'MSET\n(10 keys)', 'XADD'] -print(value_wo_b4ns_direct) -print(value_wo_b4ns_host) -print(value_w_b4ns) - plt.rcParams["figure.figsize"] = (20,4) - plt.ylabel("Request / seconds") -plt.bar([x for x in range(0, len(labels))], value_wo_b4ns_direct, align="edge", edgecolor="black", linewidth=1, hatch='//', width=BAR_WIDTH, label='w/o bypass4netns(direct)') -plt.bar([x+BAR_WIDTH for x in range(0, len(labels))], value_wo_b4ns_host, align="edge", edgecolor="black", linewidth=1, hatch='//', width=BAR_WIDTH, label='w/o bypass4netns(via host)') -plt.bar([x+BAR_WIDTH*2 for x in range(0, len(labels))], value_w_b4ns, align="edge", edgecolor="black", linewidth=1, hatch='++', width=BAR_WIDTH, label='w/ bypass4netns(via host)') + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data_csv = load_data(filename) + value = [] + for l in labels_for_data: + value.append(data_csv[l]) + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) plt.legend() -plt.xlim(0, len(labels)+BAR_WIDTH*3-1) -plt.xticks([x+BAR_WIDTH*1.5 for x in range(0, len(labels))], labels) +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) -plt.savefig(sys.argv[4]) +plt.savefig(sys.argv[1+data_num]) From 61a8add3cf8c9ff8a27dcd09a7c628761f6897ea Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 11:21:55 +0000 Subject: [PATCH 35/55] add iperf3 plot Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 7 ++++--- benchmark/.gitignore | 3 ++- benchmark/iperf3.sh | 8 +++++--- benchmark/iperf3_multinode.sh | 4 ++-- benchmark/iperf3_plot.py | 30 ++++++++++++++++++++++++++++++ 5 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 benchmark/iperf3_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 410acea..fc3637a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -125,7 +125,7 @@ jobs: run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" - name: upload plot id: get_plot - if: matrix.script == 'redis' + if: matrix.script == 'redis' || matrix.script == 'iperf3' run: | mkdir /tmp/benchmark-results sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.csv /tmp/benchmark-results/. @@ -165,7 +165,7 @@ jobs: run: ./benchmark/${{ matrix.script }}_multinode.sh - name: upload plot id: get_plot - if: matrix.script == 'redis' + if: matrix.script == 'redis' || matrix.script == 'iperf3' run: | mkdir /tmp/benchmark-results sudo lxc file pull test2/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-multinode-wo-b4ns.csv /tmp/benchmark-results/. @@ -188,7 +188,8 @@ jobs: name: benchmark-results path: ./ - run: mkdir /tmp/benchmark-plots - - run: python3 benchmark/redis_plot.py python3 redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-multinode-wo-b4ns.csv redis-w-b4ns.csv redis-multinode-w-b4ns.csv /tmp/benchmark-plots/redis.png + - run: python3 benchmark/redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-multinode-wo-b4ns.csv redis-w-b4ns.csv redis-multinode-w-b4ns.csv /tmp/benchmark-plots/redis.png + - run: python3 benchmark/iperf3_plot.py iperf3-wo-b4ns-direct.json iperf3-wo-b4ns-host.json iperf3-multinode-wo-b4ns.json iperf3-w-b4ns.json iperf3-multinode-w-b4ns.json /tmp/benchmark-plots/iperf3.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/.gitignore b/benchmark/.gitignore index 8ec9dfd..6980aa6 100644 --- a/benchmark/.gitignore +++ b/benchmark/.gitignore @@ -1,2 +1,3 @@ *.csv -*.png \ No newline at end of file +*.png +*.json \ No newline at end of file diff --git a/benchmark/iperf3.sh b/benchmark/iperf3.sh index cd203f5..ed6a248 100755 --- a/benchmark/iperf3.sh +++ b/benchmark/iperf3.sh @@ -2,6 +2,8 @@ set -eu -o pipefail +cd $(dirname $0) + ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" source ~/.profile @@ -26,7 +28,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) SERVER_IP=$(nerdctl exec iperf3-server hostname -i) sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-direct.json nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -52,7 +54,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.json nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -81,7 +83,7 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) vi SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.json nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client diff --git a/benchmark/iperf3_multinode.sh b/benchmark/iperf3_multinode.sh index 786451d..c55db29 100755 --- a/benchmark/iperf3_multinode.sh +++ b/benchmark/iperf3_multinode.sh @@ -36,7 +36,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-wo-b4ns.json NAME="test" exec_lxc nerdctl rm -f iperf3-server NAME="test" exec_lxc systemctl --user reset-failed @@ -55,7 +55,7 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) wi SERVER_IP=$(NAME="test" exec_lxc nerdctl exec iperf3-server hostname -i) NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-multinode-w-b4ns.json NAME="test" exec_lxc nerdctl rm -f iperf3-server NAME="test2" exec_lxc nerdctl rm -f iperf3-client diff --git a/benchmark/iperf3_plot.py b/benchmark/iperf3_plot.py new file mode 100644 index 0000000..744bdd4 --- /dev/null +++ b/benchmark/iperf3_plot.py @@ -0,0 +1,30 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.4 + +def load_data(filename): + with open(filename) as f: + return json.load(f) + +labels=['sum_received.bits_per_second'] + +#plt.rcParams["figure.figsize"] = (20,4) +plt.ylabel("Gbps") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data_json = load_data(filename) + value = [data_json["end"]["sum_received"]["bits_per_second"] / 1024 / 1024 / 1024] + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +plt.legend() +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From f50cc1a58a516112bf0d229a4c1b77a6c9891419 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 28 Nov 2023 11:31:31 +0000 Subject: [PATCH 36/55] use common extension Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 14 +++++++------- benchmark/iperf3.sh | 6 +++--- benchmark/iperf3_multinode.sh | 4 ++-- benchmark/redis.sh | 12 ++++++------ benchmark/redis_multinode.sh | 4 ++-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fc3637a..5ff89d2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -128,9 +128,9 @@ jobs: if: matrix.script == 'redis' || matrix.script == 'iperf3' run: | mkdir /tmp/benchmark-results - sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.csv /tmp/benchmark-results/. - sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-host.csv /tmp/benchmark-results/. - sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-w-b4ns.csv /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-host.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-w-b4ns.log /tmp/benchmark-results/. - uses: actions/upload-artifact@v3 if: steps.get_plot.conclusion == 'success' with: @@ -168,8 +168,8 @@ jobs: if: matrix.script == 'redis' || matrix.script == 'iperf3' run: | mkdir /tmp/benchmark-results - sudo lxc file pull test2/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-multinode-wo-b4ns.csv /tmp/benchmark-results/. - sudo lxc file pull test2/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-multinode-w-b4ns.csv /tmp/benchmark-results/. + cp benchmark/${{ matrix.script }}-multinode-wo-b4ns.log /tmp/benchmark-results/. + cp benchmark/${{ matrix.script }}-multinode-w-b4ns.log /tmp/benchmark-results/. - uses: actions/upload-artifact@v3 if: steps.get_plot.conclusion == 'success' with: @@ -188,8 +188,8 @@ jobs: name: benchmark-results path: ./ - run: mkdir /tmp/benchmark-plots - - run: python3 benchmark/redis_plot.py redis-wo-b4ns-direct.csv redis-wo-b4ns-host.csv redis-multinode-wo-b4ns.csv redis-w-b4ns.csv redis-multinode-w-b4ns.csv /tmp/benchmark-plots/redis.png - - run: python3 benchmark/iperf3_plot.py iperf3-wo-b4ns-direct.json iperf3-wo-b4ns-host.json iperf3-multinode-wo-b4ns.json iperf3-w-b4ns.json iperf3-multinode-w-b4ns.json /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png + - run: python3 benchmark/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/iperf3.sh b/benchmark/iperf3.sh index ed6a248..c8dac8a 100755 --- a/benchmark/iperf3.sh +++ b/benchmark/iperf3.sh @@ -28,7 +28,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) SERVER_IP=$(nerdctl exec iperf3-server hostname -i) sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-direct.json + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-direct.log nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -54,7 +54,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.json + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.log nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -83,7 +83,7 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) vi SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.json + nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.log nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client diff --git a/benchmark/iperf3_multinode.sh b/benchmark/iperf3_multinode.sh index c55db29..e47c558 100755 --- a/benchmark/iperf3_multinode.sh +++ b/benchmark/iperf3_multinode.sh @@ -36,7 +36,7 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) NAME="test2" exec_lxc nerdctl exec iperf3-client apk add --no-cache iperf3 NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-wo-b4ns.json + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-wo-b4ns.log NAME="test" exec_lxc nerdctl rm -f iperf3-server NAME="test" exec_lxc systemctl --user reset-failed @@ -55,7 +55,7 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) wi SERVER_IP=$(NAME="test" exec_lxc nerdctl exec iperf3-server hostname -i) NAME="test" exec_lxc systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-multinode-w-b4ns.json + NAME="test2" exec_lxc nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-multinode-w-b4ns.log NAME="test" exec_lxc nerdctl rm -f iperf3-server NAME="test2" exec_lxc nerdctl rm -f iperf3-client diff --git a/benchmark/redis.sh b/benchmark/redis.sh index caa6298..8d48f6d 100755 --- a/benchmark/redis.sh +++ b/benchmark/redis.sh @@ -20,8 +20,8 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(nerdctl exec redis-server hostname -i) - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-wo-b4ns-direct.csv - cat redis-wo-b4ns-direct.csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-wo-b4ns-direct.log + cat redis-wo-b4ns-direct.log nerdctl rm -f redis-server nerdctl rm -f redis-client @@ -37,8 +37,8 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-wo-b4ns-host.csv - cat redis-wo-b4ns-host.csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-wo-b4ns-host.log + cat redis-wo-b4ns-host.log nerdctl rm -f redis-server nerdctl rm -f redis-client @@ -58,8 +58,8 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-w-b4ns.csv - cat redis-w-b4ns.csv + nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-w-b4ns.log + cat redis-w-b4ns.log nerdctl rm -f redis-server nerdctl rm -f redis-client diff --git a/benchmark/redis_multinode.sh b/benchmark/redis_multinode.sh index 881aece..6465dd8 100755 --- a/benchmark/redis_multinode.sh +++ b/benchmark/redis_multinode.sh @@ -33,7 +33,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) w NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR - NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-wo-b4ns.csv + NAME="test2" exec_lxc nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-wo-b4ns.log NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client @@ -48,7 +48,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE" SERVER_IP=$(NAME="test" exec_lxc nerdctl exec redis-server hostname -i) NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity" - NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" > redis-multinode-w-b4ns.csv + NAME="test2" exec_lxc nerdctl exec redis-client /bin/sh -c "sleep 1 && redis-benchmark -q -h $SERVER_IP --csv" > redis-multinode-w-b4ns.log NAME="test" exec_lxc nerdctl rm -f redis-server NAME="test2" exec_lxc nerdctl rm -f redis-client From 26c2ac65350ee84a04f239886f7d8b2a4925275c Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 29 Nov 2023 13:58:19 +0000 Subject: [PATCH 37/55] add postgres plot Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 8 ++--- benchmark/.gitignore | 3 +- benchmark/postgres.sh | 6 ++-- benchmark/postgres_multinode.sh | 4 +-- benchmark/postgres_plot.py | 56 +++++++++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 benchmark/postgres_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5ff89d2..d6b436d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -7,6 +7,8 @@ on: - ng-b4ns - release/** pull_request: null + workflow_dispatch: + jobs: golangci-lint: runs-on: ubuntu-22.04 @@ -125,7 +127,7 @@ jobs: run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" - name: upload plot id: get_plot - if: matrix.script == 'redis' || matrix.script == 'iperf3' + if: matrix.script != 'iperf3_host' run: | mkdir /tmp/benchmark-results sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.log /tmp/benchmark-results/. @@ -164,14 +166,11 @@ jobs: - name: run benchmark (${{ matrix.script }}) run: ./benchmark/${{ matrix.script }}_multinode.sh - name: upload plot - id: get_plot - if: matrix.script == 'redis' || matrix.script == 'iperf3' run: | mkdir /tmp/benchmark-results cp benchmark/${{ matrix.script }}-multinode-wo-b4ns.log /tmp/benchmark-results/. cp benchmark/${{ matrix.script }}-multinode-w-b4ns.log /tmp/benchmark-results/. - uses: actions/upload-artifact@v3 - if: steps.get_plot.conclusion == 'success' with: name: benchmark-results path: /tmp/benchmark-results @@ -190,6 +189,7 @@ jobs: - run: mkdir /tmp/benchmark-plots - run: python3 benchmark/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png - run: python3 benchmark/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/postgres_plot.py postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/.gitignore b/benchmark/.gitignore index 6980aa6..676904e 100644 --- a/benchmark/.gitignore +++ b/benchmark/.gitignore @@ -1,3 +1,4 @@ *.csv *.png -*.json \ No newline at end of file +*.json +*.log \ No newline at end of file diff --git a/benchmark/postgres.sh b/benchmark/postgres.sh index 11cab6f..2e1e6bb 100755 --- a/benchmark/postgres.sh +++ b/benchmark/postgres.sh @@ -24,7 +24,7 @@ echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4net PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 5432; do sleep 1; done" nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres - nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-direct.log nerdctl rm -f psql-server nerdctl rm -f psql-client @@ -42,7 +42,7 @@ echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4net SERVER_IP=$(hostname -I | awk '{print $1}') sleep 5 nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-host.log nerdctl rm -f psql-server nerdctl rm -f psql-client @@ -65,7 +65,7 @@ echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 15432; do sleep 1; done" nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres + nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-w-b4ns.log nerdctl rm -f psql-server nerdctl rm -f psql-client diff --git a/benchmark/postgres_multinode.sh b/benchmark/postgres_multinode.sh index 53b5be6..466ead2 100755 --- a/benchmark/postgres_multinode.sh +++ b/benchmark/postgres_multinode.sh @@ -35,7 +35,7 @@ echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4net NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR sleep 5 NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -i postgres - NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres > postgres-multinode-wo-b4ns.log NAME="test" exec_lxc nerdctl rm -f psql-server NAME="test2" exec_lxc nerdctl rm -f psql-client @@ -51,7 +51,7 @@ echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns SERVER_IP=$(NAME="test" exec_lxc nerdctl exec psql-server hostname -i) sleep 5 NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres - NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres + NAME="test2" exec_lxc nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-multinode-w-b4ns.log NAME="test" exec_lxc nerdctl rm -f psql-server NAME="test2" exec_lxc nerdctl rm -f psql-client diff --git a/benchmark/postgres_plot.py b/benchmark/postgres_plot.py new file mode 100644 index 0000000..3e43a7e --- /dev/null +++ b/benchmark/postgres_plot.py @@ -0,0 +1,56 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip() + if "latency average" in line: + data["latency (ms)"] = float(line.split(" ")[3]) + if "tps" in line: + data["tps"] = float(line.split(" ")[2]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['tps', 'latency (ms)'] + +plt.ylabel("Request / seconds") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +print(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("transaction / second") +ax2 = ax1.twinx() +ax2.set_ylabel("latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i], datas[i][labels[0]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor+BAR_WIDTH*i], datas[i][labels[1]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From 68888af47afd2cec917c98e2388b81bb76df8a53 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 30 Nov 2023 00:55:43 +0000 Subject: [PATCH 38/55] move benchmarks to directories Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 12 ++++++------ benchmark/{ => iperf3}/iperf3.sh | 0 benchmark/{ => iperf3}/iperf3_host.sh | 0 benchmark/{ => iperf3}/iperf3_multinode.sh | 2 +- benchmark/{ => iperf3}/iperf3_plot.py | 0 benchmark/{ => postgres}/postgres.sh | 2 +- benchmark/{ => postgres}/postgres_multinode.sh | 2 +- benchmark/{ => postgres}/postgres_plot.py | 0 benchmark/{ => redis}/redis.sh | 0 benchmark/{ => redis}/redis_multinode.sh | 2 +- benchmark/{ => redis}/redis_plot.py | 0 11 files changed, 10 insertions(+), 10 deletions(-) rename benchmark/{ => iperf3}/iperf3.sh (100%) rename benchmark/{ => iperf3}/iperf3_host.sh (100%) rename benchmark/{ => iperf3}/iperf3_multinode.sh (99%) rename benchmark/{ => iperf3}/iperf3_plot.py (100%) rename benchmark/{ => postgres}/postgres.sh (99%) rename benchmark/{ => postgres}/postgres_multinode.sh (99%) rename benchmark/{ => postgres}/postgres_plot.py (100%) rename benchmark/{ => redis}/redis.sh (100%) rename benchmark/{ => redis}/redis_multinode.sh (99%) rename benchmark/{ => redis}/redis_plot.py (100%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d6b436d..71d1103 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -106,7 +106,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3_host", "iperf3", "postgres", "redis"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -127,7 +127,7 @@ jobs: run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" - name: upload plot id: get_plot - if: matrix.script != 'iperf3_host' + if: matrix.script != 'iperf3/iperf3_host' run: | mkdir /tmp/benchmark-results sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.log /tmp/benchmark-results/. @@ -146,7 +146,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3", "postgres", "redis"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -187,9 +187,9 @@ jobs: name: benchmark-results path: ./ - run: mkdir /tmp/benchmark-plots - - run: python3 benchmark/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png - - run: python3 benchmark/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png - - run: python3 benchmark/postgres_plot.py postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png + - run: python3 benchmark/redis/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/iperf3.sh b/benchmark/iperf3/iperf3.sh similarity index 100% rename from benchmark/iperf3.sh rename to benchmark/iperf3/iperf3.sh diff --git a/benchmark/iperf3_host.sh b/benchmark/iperf3/iperf3_host.sh similarity index 100% rename from benchmark/iperf3_host.sh rename to benchmark/iperf3/iperf3_host.sh diff --git a/benchmark/iperf3_multinode.sh b/benchmark/iperf3/iperf3_multinode.sh similarity index 99% rename from benchmark/iperf3_multinode.sh rename to benchmark/iperf3/iperf3_multinode.sh index e47c558..ac32784 100755 --- a/benchmark/iperf3_multinode.sh +++ b/benchmark/iperf3/iperf3_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../util.sh +. ../../util.sh set +e NAME="test" exec_lxc nerdctl rm -f iperf3-server diff --git a/benchmark/iperf3_plot.py b/benchmark/iperf3/iperf3_plot.py similarity index 100% rename from benchmark/iperf3_plot.py rename to benchmark/iperf3/iperf3_plot.py diff --git a/benchmark/postgres.sh b/benchmark/postgres/postgres.sh similarity index 99% rename from benchmark/postgres.sh rename to benchmark/postgres/postgres.sh index 2e1e6bb..a9ff8a3 100755 --- a/benchmark/postgres.sh +++ b/benchmark/postgres/postgres.sh @@ -7,7 +7,7 @@ POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" source ~/.profile cd $(dirname $0) -. ../util.sh +. ../../util.sh nerdctl pull --quiet $POSTGRES_IMAGE diff --git a/benchmark/postgres_multinode.sh b/benchmark/postgres/postgres_multinode.sh similarity index 99% rename from benchmark/postgres_multinode.sh rename to benchmark/postgres/postgres_multinode.sh index 466ead2..b0abdd8 100755 --- a/benchmark/postgres_multinode.sh +++ b/benchmark/postgres/postgres_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../util.sh +. ../../util.sh set +e NAME="test" exec_lxc nerdctl rm -f psql-server diff --git a/benchmark/postgres_plot.py b/benchmark/postgres/postgres_plot.py similarity index 100% rename from benchmark/postgres_plot.py rename to benchmark/postgres/postgres_plot.py diff --git a/benchmark/redis.sh b/benchmark/redis/redis.sh similarity index 100% rename from benchmark/redis.sh rename to benchmark/redis/redis.sh diff --git a/benchmark/redis_multinode.sh b/benchmark/redis/redis_multinode.sh similarity index 99% rename from benchmark/redis_multinode.sh rename to benchmark/redis/redis_multinode.sh index 6465dd8..1c71aa1 100755 --- a/benchmark/redis_multinode.sh +++ b/benchmark/redis/redis_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../util.sh +. ../../util.sh set +e NAME="test" exec_lxc nerdctl rm -f redis-server diff --git a/benchmark/redis_plot.py b/benchmark/redis/redis_plot.py similarity index 100% rename from benchmark/redis_plot.py rename to benchmark/redis/redis_plot.py From c2ef9e3cf7a873efe9bdf0bfdb5f958d07be6e39 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 30 Nov 2023 06:37:51 +0000 Subject: [PATCH 39/55] handle threads pid can be thread's pid and open_pidfd() fails with the pid. When open_pidfd() fails, retry with the pid's tgid and replace the pid to the tgid. Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 171 +++++++++++++++++++++++++------ pkg/bypass4netns/socket.go | 14 +-- 2 files changed, 147 insertions(+), 38 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index 2914544..f5181cb 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -13,6 +13,7 @@ import ( "os" "os/exec" "strconv" + "strings" "syscall" "time" @@ -80,7 +81,7 @@ func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) { } // readProcMem read data from memory of specified pid process at the spcified offset. -func (h *notifHandler) readProcMem(pid uint32, offset uint64, len uint64) ([]byte, error) { +func (h *notifHandler) readProcMem(pid int, offset uint64, len uint64) ([]byte, error) { buffer := make([]byte, len) // PATH_MAX memfd, err := h.openMem(pid) @@ -97,7 +98,7 @@ func (h *notifHandler) readProcMem(pid uint32, offset uint64, len uint64) ([]byt } // writeProcMem writes data to memory of specified pid process at the specified offset. -func (h *notifHandler) writeProcMem(pid uint32, offset uint64, buf []byte) error { +func (h *notifHandler) writeProcMem(pid int, offset uint64, buf []byte) error { memfd, err := h.openMem(pid) if err != nil { return err @@ -114,7 +115,7 @@ func (h *notifHandler) writeProcMem(pid uint32, offset uint64, buf []byte) error return nil } -func (h *notifHandler) openMem(pid uint32) (int, error) { +func (h *notifHandler) openMem(pid int) (int, error) { if memfd, ok := h.memfds[pid]; ok { return memfd, nil } @@ -133,7 +134,7 @@ func (h *notifHandler) openMem(pid uint32) (int, error) { return memfd, nil } -func openMemWithNSEnter(pid uint32) (int, error) { +func openMemWithNSEnter(pid int) (int, error) { fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) if err != nil { return 0, err @@ -278,15 +279,74 @@ type context struct { resp *libseccomp.ScmpNotifResp } -// getFdInProcess get the file descriptor in other process -func getFdInProcess(pid, targetFd int) (int, error) { +func (h *notifHandler) getPidFdInfo(pid int) (*pidInfo, error) { + // retrieve pidfd from cache + if pidfd, ok := h.pidInfos[pid]; ok { + return &pidfd, nil + } + targetPidfd, err := pidfd.Open(int(pid), 0) + if err == nil { + info := pidInfo{ + pidType: PROCESS, + pidfd: targetPidfd, + tgid: pid, // process's pid is equal to its tgid + } + h.pidInfos[pid] = info + return &info, nil + } + + // pid can be thread and pidfd_open fails with thread's pid. + // retrieve process's pid (tgid) from /proc//status and retry to get pidfd with the tgid. + logrus.Warnf("pidfd Open failed: pid=%d err=%q, this pid maybe thread and retrying with tgid", pid, err) + st, err := os.ReadFile(fmt.Sprintf("/proc/%d/status", pid)) + if err != nil { + return nil, fmt.Errorf("failed to read %d's status err=%q", pid, err) + } + + nextTgid := -1 + for _, s := range strings.Split(string(st), "\n") { + if strings.Contains(s, "Tgid") { + tgids := strings.Split(s, "\t") + if len(tgids) < 2 { + return nil, fmt.Errorf("unexpected /proc/%d/status len=%q status=%q", pid, len(tgids), string(st)) + } + tgid, err := strconv.Atoi(tgids[1]) + if err != nil { + return nil, fmt.Errorf("unexpected /proc/%d/status err=%q status=%q", pid, err, string(st)) + } + nextTgid = tgid + } + if nextTgid > 0 { + break + } + } + if nextTgid < 0 { + logrus.Errorf("cannot get Tgid from /proc/%d/status status=%q", pid, string(st)) + } + targetPidfd, err = pidfd.Open(nextTgid, 0) + if err != nil { + return nil, fmt.Errorf("pidfd Open failed with Tgid: pid=%d %s", nextTgid, err) + } + + logrus.Infof("successfully got pidfd for pid=%d tgid=%d", pid, nextTgid) + info := pidInfo{ + pidType: THREAD, + pidfd: targetPidfd, + tgid: nextTgid, + } + h.pidInfos[pid] = info + return &info, nil +} + +// getFdInProcess get the file descriptor in other process +func (h *notifHandler) getFdInProcess(pid, targetFd int) (int, error) { + targetPidfd, err := h.getPidFdInfo(pid) if err != nil { return 0, fmt.Errorf("pidfd Open failed: %s", err) } - defer syscall.Close(int(targetPidfd)) - fd, err := targetPidfd.GetFd(targetFd, 0) + fd, err := targetPidfd.pidfd.GetFd(targetFd, 0) if err != nil { return 0, fmt.Errorf("pidfd GetFd failed: %s", err) } @@ -316,7 +376,7 @@ func getSocketArgs(sockfd int) (int, int, int, error) { return sock_domain, sock_type, sock_protocol, nil } -func (h *notifHandler) readSockaddrFromProcess(pid uint32, offset uint64, addrlen uint64) (*sockaddr, error) { +func (h *notifHandler) readSockaddrFromProcess(pid int, offset uint64, addrlen uint64) (*sockaddr, error) { buf, err := h.readProcMem(pid, offset, addrlen) if err != nil { return nil, fmt.Errorf("failed readProcMem pid %v offset 0x%x: %s", pid, offset, err) @@ -324,8 +384,8 @@ func (h *notifHandler) readSockaddrFromProcess(pid uint32, offset uint64, addrle return newSockaddr(buf) } -func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, error) { - logger := logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}) +func (h *notifHandler) registerSocket(pid int, sockfd int, syscallName string) (*socketStatus, error) { + logger := logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd, "syscall": syscallName}) proc, ok := h.processes[pid] if !ok { proc = newProcessStatus() @@ -339,7 +399,13 @@ func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, er return sock, nil } - sockFdHost, err := getFdInProcess(int(pid), sockfd) + // If the pid is thread, its process can have corresponding socket + procInfo, ok := h.pidInfos[int(pid)] + if ok && procInfo.pidType == THREAD { + return nil, fmt.Errorf("unexpected procInfo") + } + + sockFdHost, err := h.getFdInProcess(int(pid), sockfd) if err != nil { return nil, err } @@ -350,13 +416,16 @@ func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, er if err != nil { // non-socket fd is not bypassable sock.state = NotBypassable + logger.Debugf("failed to get socket args err=%q", err) } else { if sockDomain != syscall.AF_INET && sockDomain != syscall.AF_INET6 { // non IP sockets are not handled. sock.state = NotBypassable + logger.Debugf("socket domain=0x%x", sockDomain) } else if sockType != syscall.SOCK_STREAM { // only accepting TCP socket sock.state = NotBypassable + logger.Debugf("socket type=0x%x", sockType) } else { // only newly created socket is allowed. _, err := syscall.Getpeername(sockFdHost) @@ -377,7 +446,7 @@ func (h *notifHandler) registerSocket(pid uint32, sockfd int) (*socketStatus, er return sock, nil } -func (h *notifHandler) getSocket(pid uint32, sockfd int) *socketStatus { +func (h *notifHandler) getSocket(pid int, sockfd int) *socketStatus { proc, ok := h.processes[pid] if !ok { return nil @@ -386,7 +455,7 @@ func (h *notifHandler) getSocket(pid uint32, sockfd int) *socketStatus { return sock } -func (h *notifHandler) removeSocket(pid uint32, sockfd int) { +func (h *notifHandler) removeSocket(pid int, sockfd int) { defer logrus.WithFields(logrus.Fields{"pid": pid, "sockfd": sockfd}).Debugf("socket is removed") proc, ok := h.processes[pid] if !ok { @@ -407,36 +476,59 @@ func (h *notifHandler) handleReq(ctx *context) { ctx.resp.Flags |= SeccompUserNotifFlagContinue + // ensure pid is registered in notifHandler.pidInfos + pidInfo, err := h.getPidFdInfo(int(ctx.req.Pid)) + if err != nil { + logrus.Errorf("failed to get pidfd err=%q", err) + return + } + + // threads shares file descriptors in the same process space. + // so use tgid as pid to process socket file descriptors + pid := pidInfo.tgid + if pidInfo.pidType == THREAD { + logrus.Debugf("pid %d is thread. use process's tgid %d as pid", ctx.req.Pid, pid) + } + // cleanup sockets when the process exit. if syscallName == "_exit" || syscallName == "exit_group" { - delete(h.processes, ctx.req.Pid) - if memfd, ok := h.memfds[ctx.req.Pid]; ok { - syscall.Close(memfd) - delete(h.memfds, ctx.req.Pid) + if pidInfo, ok := h.pidInfos[int(ctx.req.Pid)]; ok { + syscall.Close(int(pidInfo.pidfd)) + delete(h.pidInfos, int(ctx.req.Pid)) + } + if pidInfo.pidType == THREAD { + logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid, "tgid": pid}).Infof("thread is removed") + } + + if pidInfo.pidType == PROCESS { + delete(h.processes, pid) + if memfd, ok := h.memfds[pid]; ok { + syscall.Close(memfd) + delete(h.memfds, pid) + } + logrus.WithFields(logrus.Fields{"pid": pid}).Infof("process is removed") } - logrus.WithFields(logrus.Fields{"pid": ctx.req.Pid}).Debugf("process is removed") return } + sockfd := int(ctx.req.Data.Args[0]) // remove socket when closed if syscallName == "close" { - h.removeSocket(ctx.req.Pid, int(ctx.req.Data.Args[0])) + h.removeSocket(pid, sockfd) return } - pid := ctx.req.Pid - sockfd := int(ctx.req.Data.Args[0]) sock := h.getSocket(pid, sockfd) if sock == nil { - sock, err = h.registerSocket(pid, sockfd) + sock, err = h.registerSocket(pid, sockfd, syscallName) if err != nil { logrus.Errorf("failed to register socket pid %d sockfd %d: %s", pid, sockfd, err) return } } - if syscallName == "getpeername" { - sock.handleSysGetpeername(h, ctx) + if syscallName == "connect" { + logrus.WithFields(logrus.Fields{"notifFd": h.fd, "pid": pid, "sockfd": sockfd}).Infof("connect") } switch sock.state { @@ -448,11 +540,11 @@ func (h *notifHandler) handleReq(ctx *context) { switch syscallName { case "bind": - sock.handleSysBind(h, ctx) + sock.handleSysBind(pid, h, ctx) case "connect": sock.handleSysConnect(h, ctx) case "setsockopt": - sock.handleSysSetsockopt(h, ctx) + sock.handleSysSetsockopt(pid, h, ctx) case "fcntl": sock.handleSysFcntl(ctx) case "getpeername": @@ -595,7 +687,7 @@ type notifHandler struct { forwardingPorts map[int]ForwardPortMapping // key is pid - processes map[uint32]*processStatus + processes map[int]*processStatus // key is destination address e.g. "192.168.1.1:1000" containerInterfaces map[string]containerInterface @@ -603,7 +695,10 @@ type notifHandler struct { multinode *MultinodeConfig // cache /proc//mem's fd to reduce latency. key is pid, value is fd - memfds map[uint32]int + memfds map[int]int + + // cache pidfd to reduce latency. key is pid. + pidInfos map[int]pidInfo } type containerInterface struct { @@ -612,13 +707,27 @@ type containerInterface struct { lastCheckedUnix int64 } +type pidInfoPidType int + +const ( + PROCESS pidInfoPidType = iota + THREAD +) + +type pidInfo struct { + pidType pidInfoPidType + pidfd pidfd.PidFd + tgid int +} + func (h *Handler) newNotifHandler(fd uintptr, state *specs.ContainerProcessState) *notifHandler { notifHandler := notifHandler{ fd: libseccomp.ScmpFd(fd), state: state, forwardingPorts: map[int]ForwardPortMapping{}, - processes: map[uint32]*processStatus{}, - memfds: map[uint32]int{}, + processes: map[int]*processStatus{}, + memfds: map[int]int{}, + pidInfos: map[int]pidInfo{}, } notifHandler.nonBypassable = nonbypassable.New(h.ignoredSubnets) notifHandler.nonBypassableAutoUpdate = h.ignoredSubnetsAutoUpdate diff --git a/pkg/bypass4netns/socket.go b/pkg/bypass4netns/socket.go index 1789340..a9cdc55 100644 --- a/pkg/bypass4netns/socket.go +++ b/pkg/bypass4netns/socket.go @@ -70,7 +70,7 @@ func newProcessStatus() *processStatus { type socketStatus struct { state socketState - pid uint32 + pid int sockfd int sockDomain int sockType int @@ -83,7 +83,7 @@ type socketStatus struct { logger *logrus.Entry } -func newSocketStatus(pid uint32, sockfd int, sockDomain, sockType, sockProto int) *socketStatus { +func newSocketStatus(pid int, sockfd int, sockDomain, sockType, sockProto int) *socketStatus { return &socketStatus{ state: NotBypassed, pid: pid, @@ -97,14 +97,14 @@ func newSocketStatus(pid uint32, sockfd int, sockDomain, sockType, sockProto int } } -func (ss *socketStatus) handleSysSetsockopt(handler *notifHandler, ctx *context) { +func (ss *socketStatus) handleSysSetsockopt(pid int, handler *notifHandler, ctx *context) { ss.logger.Debug("handle setsockopt") level := ctx.req.Data.Args[1] optname := ctx.req.Data.Args[2] optlen := ctx.req.Data.Args[4] - optval, err := handler.readProcMem(ctx.req.Pid, ctx.req.Data.Args[3], optlen) + optval, err := handler.readProcMem(pid, ctx.req.Data.Args[3], optlen) if err != nil { - ss.logger.Errorf("setsockopt readProcMem failed pid %v offset 0x%x: %s", ctx.req.Pid, ctx.req.Data.Args[1], err) + ss.logger.Errorf("setsockopt readProcMem failed pid %v offset 0x%x: %s", pid, ctx.req.Data.Args[1], err) } value := socketOption{ @@ -292,8 +292,8 @@ func (ss *socketStatus) handleSysConnect(handler *notifHandler, ctx *context) { ss.logger.Infof("bypassed connect socket destAddr=%s", ss.addr) } -func (ss *socketStatus) handleSysBind(handler *notifHandler, ctx *context) { - sa, err := handler.readSockaddrFromProcess(ctx.req.Pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) +func (ss *socketStatus) handleSysBind(pid int, handler *notifHandler, ctx *context) { + sa, err := handler.readSockaddrFromProcess(pid, ctx.req.Data.Args[1], ctx.req.Data.Args[2]) if err != nil { ss.logger.Errorf("failed to read sockaddr from process: %q", err) ss.state = NotBypassable From 28c59455451e4f869f48d473ef6e9613de6a1f6e Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 30 Nov 2023 11:36:08 +0000 Subject: [PATCH 40/55] try to re-register NotBypassable socket when connect(2) called. Some binaries (e.g. golang) close fd then create socket with same fd immediately. Seccomp notify sometimes drops first close and b4ns cannot bypass the socket. This is workaround for such inconsistent condition. Signed-off-by: Naoki MATSUMOTO --- pkg/bypass4netns/bypass4netns.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/bypass4netns/bypass4netns.go b/pkg/bypass4netns/bypass4netns.go index f5181cb..9ea628e 100644 --- a/pkg/bypass4netns/bypass4netns.go +++ b/pkg/bypass4netns/bypass4netns.go @@ -527,15 +527,29 @@ func (h *notifHandler) handleReq(ctx *context) { } } - if syscallName == "connect" { - logrus.WithFields(logrus.Fields{"notifFd": h.fd, "pid": pid, "sockfd": sockfd}).Infof("connect") - } - switch sock.state { - case NotBypassable, Bypassed: + case NotBypassable: + // sometimes close(2) is not called for the fd. + // To handle such condition, re-register fd when connect is called for not bypassable fd. + if syscallName == "connect" { + h.removeSocket(pid, sockfd) + sock, err = h.registerSocket(pid, sockfd, syscallName) + if err != nil { + logrus.Errorf("failed to re-register socket pid %d sockfd %d: %s", pid, sockfd, err) + return + } + } + if sock.state != NotBypassed { + return + } + + // when sock.state == NotBypassed, continue + case Bypassed: + if syscallName == "getpeername" { + sock.handleSysGetpeername(h, ctx) + } return default: - // continue } switch syscallName { From 70d73e3e98f3a677b442e4c126d12b47e70928ef Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 30 Nov 2023 11:52:16 +0000 Subject: [PATCH 41/55] add block benchmark (nginx, static linked go-based http client) Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 5 +- benchmark/block/.gitignore | 2 + benchmark/block/Dockerfile | 14 ++++ benchmark/block/bench.go | 102 ++++++++++++++++++++++++ benchmark/block/block.sh | 123 +++++++++++++++++++++++++++++ benchmark/block/block_multinode.sh | 73 +++++++++++++++++ benchmark/block/block_plot.py | 41 ++++++++++ benchmark/block/gen_blocks.sh | 18 +++++ 8 files changed, 376 insertions(+), 2 deletions(-) create mode 100644 benchmark/block/.gitignore create mode 100644 benchmark/block/Dockerfile create mode 100644 benchmark/block/bench.go create mode 100755 benchmark/block/block.sh create mode 100755 benchmark/block/block_multinode.sh create mode 100644 benchmark/block/block_plot.py create mode 100755 benchmark/block/gen_blocks.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 71d1103..7c13e30 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -106,7 +106,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -146,7 +146,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3", "postgres/postgres", "redis/redis"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -190,6 +190,7 @@ jobs: - run: python3 benchmark/redis/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png - run: python3 benchmark/postgres/postgres_plot.py postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png + - run: python3 benchmark/block/block_plot.py block-wo-b4ns-direct.log block-wo-b4ns-host.log block-multinode-wo-b4ns.log block-w-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/block/.gitignore b/benchmark/block/.gitignore new file mode 100644 index 0000000..1d04dbb --- /dev/null +++ b/benchmark/block/.gitignore @@ -0,0 +1,2 @@ +blk-* +bench \ No newline at end of file diff --git a/benchmark/block/Dockerfile b/benchmark/block/Dockerfile new file mode 100644 index 0000000..728e4fa --- /dev/null +++ b/benchmark/block/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.21.3 as bench-builder + +COPY bench.go . +# static link +RUN CGO_ENABLED=0 go build -o /bench bench.go + +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y wget multitime nginx +COPY --from=bench-builder /bench /bench + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/benchmark/block/bench.go b/benchmark/block/bench.go new file mode 100644 index 0000000..15bbb1f --- /dev/null +++ b/benchmark/block/bench.go @@ -0,0 +1,102 @@ +package main + +import ( + "bytes" + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +var ( + url = flag.String("url", "http://localhost/blk-1m", "") + threadNum = flag.Int("thread-num", 1, "") + count = flag.Int("count", 1, "") +) + +type BenchmarkResult struct { + Url string `json:"url"` + Count int `json:"count"` + TotalElapsedSecond float64 `json:"totalElapsedSecond"` + TotalSize int64 `json:"totalSize"` +} + +func main() { + flag.Parse() + + //fmt.Printf("url = %s\n", *url) + //fmt.Printf("thread-num = %d\n", *threadNum) + //fmt.Printf("count = %d\n", *count) + + // disable connection pool + http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = -1 + + resultsChan := make(chan BenchmarkResult, *count) + + for i := 0; i < *threadNum; i++ { + go bench(*url, *count, resultsChan) + } + + results := []BenchmarkResult{} + for i := 0; i < *threadNum; i++ { + r := <-resultsChan + results = append(results, r) + } + + res, err := json.Marshal(results) + if err != nil { + fmt.Printf("failed Marshal err=%q", err) + panic("error") + } + fmt.Fprintln(os.Stdout, string(res)) +} + +func bench(url string, count int, resultChan chan BenchmarkResult) { + result := BenchmarkResult{ + Url: url, + Count: count, + TotalElapsedSecond: 0, + TotalSize: 0, + } + + for i := 0; i < count; i++ { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + fmt.Printf("failed NewRequest err=%q", err) + panic("error") + } + for { + start := time.Now() + resp, err := http.DefaultClient.Do(req) + if err != nil { + fmt.Printf("failed Do err=%q, retrying... %d/%d", err, i, count) + time.Sleep(100 * time.Millisecond) + continue + } + + if resp.StatusCode != 200 { + fmt.Printf("unexpected status code %d", resp.StatusCode) + panic("error") + } else { + var buffer bytes.Buffer + + writtenSize, err := io.Copy(&buffer, resp.Body) + if err != nil { + fmt.Printf("failed Copy() err=%q", err) + panic("error") + } + end := time.Now() + elapsed := end.Sub(start).Seconds() + result.TotalSize += writtenSize + result.TotalElapsedSecond += elapsed + } + resp.Body.Close() + break + } + } + + resultChan <- result +} diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh new file mode 100755 index 0000000..2717556 --- /dev/null +++ b/benchmark/block/block.sh @@ -0,0 +1,123 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + +IMAGE_NAME="block" +COUNT="10" + +source ~/.profile + +./gen_blocks.sh + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit + +nerdctl build -f ./Dockerfile -t $IMAGE_NAME . + +BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + set -ex + + nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(nerdctl exec block-server hostname -i) + LOG_NAME="block-wo-b4ns-direct.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + set -ex + + nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(hostname -I | awk '{print $1}') + LOG_NAME="block-wo-b4ns-host.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(hostname -I | awk '{print $1}') + LOG_NAME="block-w-b4ns.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + HOST_IP=$(hostname -I | awk '{print $1}') + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(nerdctl exec block-server hostname -i) + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + done + + nerdctl rm -f block-server + nerdctl rm -f block-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) + diff --git a/benchmark/block/block_multinode.sh b/benchmark/block/block_multinode.sh new file mode 100755 index 0000000..c159db7 --- /dev/null +++ b/benchmark/block/block_multinode.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../util.sh + +set +e +NAME="test" exec_lxc nerdctl rm -f block-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +IMAGE_NAME="block" +COUNT="10" +BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') + +set -eux -o pipefail + +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/block && nerdctl build -f ./Dockerfile -t $IMAGE_NAME ." + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +NAME="test" exec_lxc /home/ubuntu/bypass4netns/benchmark/block/gen_blocks.sh + +echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name block-client $IMAGE_NAME sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + LOG_NAME="block-multinode-wo-b4ns.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$TEST1_VXLAN_ADDR/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc nerdctl rm -f block-server + NAME="test2" exec_lxc nerdctl rm -f block-client +) + +echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 8080:80 -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec block-server hostname -i) + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity" + LOG_NAME="block-multinode-w-b4ns.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc nerdctl rm -f block-server + NAME="test2" exec_lxc nerdctl rm -f block-client +) diff --git a/benchmark/block/block_plot.py b/benchmark/block/block_plot.py new file mode 100644 index 0000000..d303b1a --- /dev/null +++ b/benchmark/block/block_plot.py @@ -0,0 +1,41 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.25 + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + # only single thread + l_json = json.loads(line)[0] + l_json["th_gbps"] = l_json["totalSize"] * 8 / l_json["totalElapsedSecond"] / 1024 / 1024 / 1024 + file = l_json["url"].split("/")[3] + data[file] = l_json + line = f.readline() + return data + +labels=['blk-1k', 'blk-32k', 'blk-512k', 'blk-1m', 'blk-32m', 'blk-128m', 'blk-512m', 'blk-1g'] + +plt.ylabel("Gbps") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + value = [] + for l in labels: + value.append(data[l]["th_gbps"]) + plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +plt.legend() +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) + diff --git a/benchmark/block/gen_blocks.sh b/benchmark/block/gen_blocks.sh new file mode 100755 index 0000000..9aa0d7b --- /dev/null +++ b/benchmark/block/gen_blocks.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +cd $(dirname $0) + +BLOCK_SIZES=(1 32 128 512) + +for BLOCK_SIZE in ${BLOCK_SIZES[@]} +do + dd if=/dev/urandom of=blk-${BLOCK_SIZE}k bs=1024 count=$BLOCK_SIZE +done + +for BLOCK_SIZE in ${BLOCK_SIZES[@]} +do + dd if=/dev/urandom of=blk-${BLOCK_SIZE}m bs=1048576 count=$BLOCK_SIZE +done + +dd if=/dev/urandom of=blk-1g bs=1048576 count=1024 + From b7dc9ff4409eac339469d232222f0d6c1c2475a0 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 30 Nov 2023 17:31:54 +0000 Subject: [PATCH 42/55] add static linked binary bypassing test Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 2 +- test/DockerfileHttpServer | 13 ++++++++ test/httpserver.go | 66 +++++++++++++++++++++++++++++++++++++ test/run_test.sh | 37 ++++++++++++++++++++- 4 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 test/DockerfileHttpServer create mode 100644 test/httpserver.go diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 7c13e30..cceee8d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -89,7 +89,7 @@ jobs: - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run test - run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh" + run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh SYNC" # some source codes may be updated. re-export new image. - name: export image run: sudo lxc image alias delete test-export && rm -f /tmp/test-image.tar.zst && ./export_lxc_image.sh test diff --git a/test/DockerfileHttpServer b/test/DockerfileHttpServer new file mode 100644 index 0000000..ffae7c1 --- /dev/null +++ b/test/DockerfileHttpServer @@ -0,0 +1,13 @@ +FROM golang:1.21.3 as bench-builder + +COPY httpserver.go . +# static link +RUN CGO_ENABLED=0 go build -o /httpserver httpserver.go + +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +COPY --from=bench-builder /httpserver /httpserver + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/test/httpserver.go b/test/httpserver.go new file mode 100644 index 0000000..6957279 --- /dev/null +++ b/test/httpserver.go @@ -0,0 +1,66 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "log" + "net/http" +) + +var ( + url = flag.String("url", "http://localhost/blk-1m", "") + mode = flag.String("mode", "server", "") +) + +func main() { + flag.Parse() + + // disable connection pool + http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = -1 + + if *mode == "server" { + fmt.Println("starting server") + server() + } else if *mode == "client" { + err := client(*url) + if err != nil { + log.Fatal(err) + } + } +} + +func server() { + http.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "pong") + }) + + log.Fatal(http.ListenAndServe(":8080", nil)) +} + +func client(url string) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed Do err=%q", err) + } + + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status code %d", resp.StatusCode) + } else { + var buffer bytes.Buffer + + _, err = io.Copy(&buffer, resp.Body) + if err != nil { + return fmt.Errorf("failed Copy() err=%q", err) + } + + fmt.Printf("resp=%s\n", buffer.String()) + } + err = resp.Body.Close() + if err != nil { + return fmt.Errorf("failed Close() err=%q", err) + } + + return nil +} diff --git a/test/run_test.sh b/test/run_test.sh index daedcb5..38cf2ac 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -9,7 +9,7 @@ nerdctl pull --quiet "${ALPINE_IMAGE}" SCRIPT_DIR=$(cd $(dirname $0); pwd) set +u -if [ ! -v 1 ]; then +if [ "$1" == "SYNC" ]; then echo "updating source code" rm -rf ~/bypass4netns sudo cp -r /host ~/bypass4netns @@ -30,12 +30,47 @@ set +e systemctl --user stop run-iperf3 systemctl --user reset-failed sleep 1 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 set -e systemd-run --user --unit run-iperf3 iperf3 -s HOST_IP=$(hostname -I | awk '{print $1}') ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json +echo "===== static linked binary test =====" +( + set +e + systemctl --user stop run-bypass4netns-static + nerdctl rm -f test1 + nerdctl rm -f test2 + systemctl --user reset-failed + set -ex + + IMAGE_NAME="b4ns:static" + nerdctl build -f ./DockerfileHttpServer -t $IMAGE_NAME . + + systemd-run --user --unit run-bypass4netns-static bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" + sleep 1 + nerdctl run -d -p 8081:8080 --name test1 $IMAGE_NAME /httpserver -mode server + nerdctl run --security-opt seccomp=/tmp/seccomp.json -d --name test2 $IMAGE_NAME sleep infinity + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + nerdctl exec test2 /httpserver -mode client -url http://$HOST_IP:8081/ping + + COUNT=$(journalctl --user -u run-bypass4netns-static.service | grep 'bypassed connect socket' | wc -l) + if [ $COUNT != 3 ]; then + echo "static linked binary bypassing not working correctly." + exit 1 + fi + + nerdctl rm -f test1 + nerdctl rm -f test2 + systemctl --user stop run-bypass4netns-static +) + echo "===== '--ignore' option test =====" ( set +e From 6724783af799288a912c7a64ad49bee504817490 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 1 Dec 2023 05:54:40 +0000 Subject: [PATCH 43/55] enable lxc privileged mode to run rootful nerdctl Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 6 ------ launch_test_lxc.sh | 2 +- test/init_test.sh | 12 ------------ 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cceee8d..aeeb17e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -86,8 +86,6 @@ jobs: run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container run: ./launch_test_lxc.sh test-export - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run test run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh SYNC" # some source codes may be updated. re-export new image. @@ -121,8 +119,6 @@ jobs: run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container run: ./launch_test_lxc.sh test-export - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run benchmark (${{ matrix.script }}) run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" - name: upload plot @@ -161,8 +157,6 @@ jobs: run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container run: ./launch_test_lxc.sh test-export - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user start containerd-fuse-overlayfs.service - - run: sudo lxc exec test -- sudo --login --user ubuntu systemctl --user status --no-pager containerd-fuse-overlayfs.service - name: run benchmark (${{ matrix.script }}) run: ./benchmark/${{ matrix.script }}_multinode.sh - name: upload plot diff --git a/launch_test_lxc.sh b/launch_test_lxc.sh index 30f7141..92d6903 100755 --- a/launch_test_lxc.sh +++ b/launch_test_lxc.sh @@ -6,7 +6,7 @@ IMAGE=${1:-"images:ubuntu/22.04"} cd $(dirname $0) # lxd init --auto --storage-backend=btrfs -sudo lxc launch -c security.nesting=true $IMAGE test +sudo lxc launch -c security.privileged=true -c security.nesting=true $IMAGE test sudo lxc config device add test share disk source=$(pwd) path=/host sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" # let user services running diff --git a/test/init_test.sh b/test/init_test.sh index 8867ad3..4fa2ee8 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -35,18 +35,6 @@ echo "===== Prepare =====" containerd-rootless-setuptool.sh install containerd-rootless-setuptool.sh install-buildkit - containerd-rootless-setuptool.sh install-fuse-overlayfs - cat << EOF >> /home/$TEST_USER/.config/containerd/config.toml -[proxy_plugins] - [proxy_plugins."fuse-overlayfs"] - type = "snapshot" - address = "/run/user/1000/containerd-fuse-overlayfs.sock" -EOF - - systemctl restart --user containerd - echo 'export CONTAINERD_SNAPSHOTTER="fuse-overlayfs"' >> ~/.profile - source ~/.profile - # build nerdctl with bypass4netns curl -fsSL https://github.com/containerd/nerdctl/archive/refs/tags/v${NERDCTL_VERSION}.tar.gz | tar Cxz ~/ cd ~/nerdctl-${NERDCTL_VERSION} From eb7be2ab0205d2a2e14970593d0f2ff133a2af96 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 1 Dec 2023 06:29:17 +0000 Subject: [PATCH 44/55] run rootful container test Signed-off-by: Naoki MATSUMOTO --- benchmark/block/block.sh | 10 ++++------ benchmark/iperf3/iperf3.sh | 10 ++++------ benchmark/iperf3/iperf3_host.sh | 2 +- benchmark/postgres/postgres.sh | 16 +++++++--------- benchmark/redis/redis.sh | 10 ++++------ test/init_test.sh | 3 +++ test/run_test.sh | 16 ++++++++++++++-- 7 files changed, 37 insertions(+), 30 deletions(-) diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh index 2717556..0de6a02 100755 --- a/benchmark/block/block.sh +++ b/benchmark/block/block.sh @@ -23,6 +23,7 @@ systemctl --user status --no-pager buildkit nerdctl build -f ./Dockerfile -t $IMAGE_NAME . BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( @@ -54,12 +55,11 @@ echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" nerdctl run -d --name block-client $IMAGE_NAME sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') LOG_NAME="block-wo-b4ns-host.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME done nerdctl rm -f block-server @@ -79,12 +79,11 @@ echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') LOG_NAME="block-w-b4ns.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME done nerdctl rm -f block-server @@ -102,8 +101,7 @@ echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) wit systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | awk '{print $1}') - systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" diff --git a/benchmark/iperf3/iperf3.sh b/benchmark/iperf3/iperf3.sh index c8dac8a..35ae79c 100755 --- a/benchmark/iperf3/iperf3.sh +++ b/benchmark/iperf3/iperf3.sh @@ -9,6 +9,7 @@ ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" source ~/.profile nerdctl pull --quiet $ALPINE_IMAGE +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( @@ -52,9 +53,8 @@ echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.log + nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-wo-b4ns-host.log nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -81,9 +81,8 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) vi systemd-run --user --unit iperf3-server nerdctl exec iperf3-server iperf3 -s - SERVER_IP=$(hostname -I | awk '{print $1}') sleep 1 - nerdctl exec iperf3-client iperf3 -c $SERVER_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.log + nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-w-b4ns.log nerdctl rm -f iperf3-server nerdctl rm -f iperf3-client @@ -103,8 +102,7 @@ echo "===== Benchmark: iperf3 client(w/ bypass4netns) server(w/ bypass4netns) wi systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | awk '{print $1}') - systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity diff --git a/benchmark/iperf3/iperf3_host.sh b/benchmark/iperf3/iperf3_host.sh index dd1a80b..464f1b2 100755 --- a/benchmark/iperf3/iperf3_host.sh +++ b/benchmark/iperf3/iperf3_host.sh @@ -7,7 +7,7 @@ source ~/.profile ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" nerdctl pull --quiet "${ALPINE_IMAGE}" -HOST_IP=$(hostname -I | awk '{print $1}') +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) systemd-run --user --unit run-iperf3 iperf3 -s echo "===== Benchmark: netns -> host With bypass4netns =====" diff --git a/benchmark/postgres/postgres.sh b/benchmark/postgres/postgres.sh index a9ff8a3..e860290 100755 --- a/benchmark/postgres/postgres.sh +++ b/benchmark/postgres/postgres.sh @@ -10,6 +10,7 @@ cd $(dirname $0) . ../../util.sh nerdctl pull --quiet $POSTGRES_IMAGE +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( @@ -39,10 +40,9 @@ echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4net nerdctl run -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') sleep 5 - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-host.log + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-wo-b4ns-host.log nerdctl rm -f psql-server nerdctl rm -f psql-client @@ -61,11 +61,10 @@ echo "===== Benchmark: postgresql client(w/ bypass4netns) server(w/ bypass4netns nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE nerdctl run --label nerdctl/bypass4netns=true -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') PID=$(nerdctl inspect psql-client | jq '.[0].State.Pid') - NAME="psql-client" exec_netns /bin/bash -c "until nc -z $SERVER_IP 15432; do sleep 1; done" - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -i postgres - nerdctl exec psql-client pgbench -h $SERVER_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-w-b4ns.log + NAME="psql-client" exec_netns /bin/bash -c "until nc -z $HOST_IP 15432; do sleep 1; done" + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-w-b4ns.log nerdctl rm -f psql-server nerdctl rm -f psql-client @@ -82,8 +81,7 @@ echo "===== Benchmark: postgres client(w/ bypass4netns) server(w/ bypass4netns) systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | awk '{print $1}') - systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE diff --git a/benchmark/redis/redis.sh b/benchmark/redis/redis.sh index 8d48f6d..9fdb66b 100755 --- a/benchmark/redis/redis.sh +++ b/benchmark/redis/redis.sh @@ -8,6 +8,7 @@ REDIS_IMAGE="redis:${REDIS_VERSION}" source ~/.profile +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) nerdctl pull --quiet $REDIS_IMAGE echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" @@ -36,8 +37,7 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-wo-b4ns-host.log + nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-wo-b4ns-host.log cat redis-wo-b4ns-host.log nerdctl rm -f redis-server @@ -57,8 +57,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE nerdctl run --label nerdctl/bypass4netns=true -d --name redis-client $REDIS_IMAGE sleep infinity - SERVER_IP=$(hostname -I | awk '{print $1}') - nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP -p 6380 --csv > redis-w-b4ns.log + nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-w-b4ns.log cat redis-w-b4ns.log nerdctl rm -f redis-server @@ -76,8 +75,7 @@ echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) wit systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | awk '{print $1}') - systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP nerdctl run --label nerdctl/bypass4netns=true -d -p 6380:6379 --name redis-server $REDIS_IMAGE diff --git a/test/init_test.sh b/test/init_test.sh index 4fa2ee8..64c5f15 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -48,4 +48,7 @@ echo "===== Prepare =====" make sudo rm -f /usr/local/bin/bypass4netns* sudo make install + + # also enable rootful containerd for rootful container testing + sudo systemctl enable --now containerd ) diff --git a/test/run_test.sh b/test/run_test.sh index 38cf2ac..c85a739 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -37,9 +37,22 @@ sleep 3 set -e systemd-run --user --unit run-iperf3 iperf3 -s -HOST_IP=$(hostname -I | awk '{print $1}') +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json +echo "===== rootful mode ====" +( + set +e + sudo nerdctl rm -f test + set -ex + + sudo nerdctl run -d --name test $ALPINE_IMAGE sleep infinity + sudo nerdctl exec test apk add --no-cache iperf3 + sudo nerdctl exec test iperf3 -c $HOST_IP -t 1 --connect-timeout 1000 # it must success to connect. + + sudo nerdctl rm -f test +) + echo "===== static linked binary test =====" ( set +e @@ -191,7 +204,6 @@ echo "===== multinode test (single node) ====" systemctl --user reset-failed set -ex - HOST_IP=$(hostname -I | sed 's/ //') systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://${HOST_IP}:2379 --advertise-client-urls http://${HOST_IP}:2379 systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP --debug sleep 1 From 4b15d23c120b8caebfd615e9227a189998996116 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 1 Dec 2023 07:20:02 +0000 Subject: [PATCH 45/55] add rootful container benchmarks Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 11 +++-- benchmark/block/block.sh | 42 +++++++++++++++++++ benchmark/block/block_multinode.sh | 19 +++++++++ benchmark/iperf3/iperf3.sh | 51 ++++++++++++++++++++++++ benchmark/iperf3/iperf3_multinode.sh | 19 +++++++++ benchmark/postgres/postgres.sh | 36 +++++++++++++++++ benchmark/postgres/postgres_multinode.sh | 16 ++++++++ benchmark/redis/redis.sh | 34 ++++++++++++++++ benchmark/redis/redis_multinode.sh | 15 ++++++- test/init_test.sh | 1 + util.sh | 6 ++- 11 files changed, 244 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aeeb17e..08d91d1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -126,6 +126,8 @@ jobs: if: matrix.script != 'iperf3/iperf3_host' run: | mkdir /tmp/benchmark-results + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-rootful-direct.log /tmp/benchmark-results/. + sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-rootful-host.log /tmp/benchmark-results/. sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-direct.log /tmp/benchmark-results/. sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-wo-b4ns-host.log /tmp/benchmark-results/. sudo lxc file pull test/home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}-w-b4ns.log /tmp/benchmark-results/. @@ -162,6 +164,7 @@ jobs: - name: upload plot run: | mkdir /tmp/benchmark-results + cp benchmark/${{ matrix.script }}-multinode-rootful.log /tmp/benchmark-results/. cp benchmark/${{ matrix.script }}-multinode-wo-b4ns.log /tmp/benchmark-results/. cp benchmark/${{ matrix.script }}-multinode-w-b4ns.log /tmp/benchmark-results/. - uses: actions/upload-artifact@v3 @@ -181,10 +184,10 @@ jobs: name: benchmark-results path: ./ - run: mkdir /tmp/benchmark-plots - - run: python3 benchmark/redis/redis_plot.py redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png - - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png - - run: python3 benchmark/postgres/postgres_plot.py postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png - - run: python3 benchmark/block/block_plot.py block-wo-b4ns-direct.log block-wo-b4ns-host.log block-multinode-wo-b4ns.log block-w-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block.png + - run: python3 benchmark/redis/redis_plot.py redis-rootful-direct.log redis-rootful-host.log redis-multinode-rootful.log redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-rootful-direct.log iperf3-rootful-host.log iperf3-multinode-rootful.log iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-rootful-direct.log postgres-rootful-host.log postgres-multinode-rootful.log postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png + - run: python3 benchmark/block/block_plot.py block-rootful-direct.log block-rootful-host.log block-multinode-rootful.log block-wo-b4ns-direct.log block-wo-b4ns-host.log block-multinode-wo-b4ns.log block-w-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh index 0de6a02..d12042b 100755 --- a/benchmark/block/block.sh +++ b/benchmark/block/block.sh @@ -20,11 +20,53 @@ sleep 3 systemctl --user status --no-pager containerd systemctl --user status --no-pager buildkit +sudo nerdctl build -f ./Dockerfile -t $IMAGE_NAME . nerdctl build -f ./Dockerfile -t $IMAGE_NAME . BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +echo "===== Benchmark: block rooful via NetNS =====" +( + set +e + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client + set -ex + + sudo nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + SERVER_IP=$(sudo nerdctl exec block-server hostname -i) + LOG_NAME="block-rootful-direct.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + done + + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client +) + +echo "===== Benchmark: block rootful via host =====" +( + set +e + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client + set -ex + + sudo nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" + sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + LOG_NAME="block-rootful-host.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + done + + sudo nerdctl rm -f block-server + sudo nerdctl rm -f block-client +) + echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( set +e diff --git a/benchmark/block/block_multinode.sh b/benchmark/block/block_multinode.sh index c159db7..be7fe9b 100755 --- a/benchmark/block/block_multinode.sh +++ b/benchmark/block/block_multinode.sh @@ -4,6 +4,7 @@ cd $(dirname $0) . ../../util.sh set +e +NAME="test" exec_lxc sudo nerdctl rm -f block-server NAME="test" exec_lxc nerdctl rm -f block-server sudo lxc rm -f test2 @@ -23,6 +24,7 @@ NAME="test" exec_lxc systemctl --user restart buildkit sleep 3 NAME="test" exec_lxc systemctl --user status --no-pager containerd NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/block && sudo nerdctl build -f ./Dockerfile -t $IMAGE_NAME ." NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/block && nerdctl build -f ./Dockerfile -t $IMAGE_NAME ." sudo lxc stop test @@ -36,6 +38,23 @@ TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') NAME="test" exec_lxc /home/ubuntu/bypass4netns/benchmark/block/gen_blocks.sh +echo "===== Benchmark: block rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name block-client $IMAGE_NAME sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh block-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + LOG_NAME="block-multinode-rootful.log" + rm -f $LOG_NAME + for BLOCK_SIZE in ${BLOCK_SIZES[@]} + do + NAME="test2" exec_lxc /bin/bash -c "sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$TEST1_VXLAN_ADDR/blk-$BLOCK_SIZE" >> $LOG_NAME + done + + NAME="test" exec_lxc sudo nerdctl rm -f block-server + NAME="test2" exec_lxc sudo nerdctl rm -f block-client +) + echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" ( NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name block-server -v /home/ubuntu/bypass4netns/benchmark/block:/var/www/html:ro $IMAGE_NAME nginx -g \"daemon off;\"" diff --git a/benchmark/iperf3/iperf3.sh b/benchmark/iperf3/iperf3.sh index 35ae79c..ae5a9f5 100755 --- a/benchmark/iperf3/iperf3.sh +++ b/benchmark/iperf3/iperf3.sh @@ -8,9 +8,60 @@ ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" source ~/.profile +sudo nerdctl pull --quiet $ALPINE_IMAGE nerdctl pull --quiet $ALPINE_IMAGE HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +echo "===== Benchmark: iperf3 rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + set -ex + + sudo nerdctl run -d --name iperf3-server $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + sudo nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + + SERVER_IP=$(sudo nerdctl exec iperf3-server hostname -i) + sleep 1 + sudo nerdctl exec iperf3-client iperf3 -c $SERVER_IP -i 0 --connect-timeout 1000 -J > iperf3-rootful-direct.log + + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + +echo "===== Benchmark: iperf3 rootful via host =====" +( + set +e + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed + set -ex + + sudo nerdctl run -d --name iperf3-server -p 5202:5201 $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + sudo nerdctl run -d --name iperf3-client $ALPINE_IMAGE sleep infinity + sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + + sleep 1 + sudo nerdctl exec iperf3-client iperf3 -c $HOST_IP -p 5202 -i 0 --connect-timeout 1000 -J > iperf3-rootful-host.log + + sudo nerdctl rm -f iperf3-server + sudo nerdctl rm -f iperf3-client + systemctl --user stop iperf3-server + systemctl --user reset-failed +) + echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( set +e diff --git a/benchmark/iperf3/iperf3_multinode.sh b/benchmark/iperf3/iperf3_multinode.sh index ac32784..d8c5272 100755 --- a/benchmark/iperf3/iperf3_multinode.sh +++ b/benchmark/iperf3/iperf3_multinode.sh @@ -4,6 +4,7 @@ cd $(dirname $0) . ../../util.sh set +e +NAME="test" exec_lxc sudo nerdctl rm -f iperf3-server NAME="test" exec_lxc nerdctl rm -f iperf3-server sudo lxc rm -f test2 @@ -15,6 +16,7 @@ ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" set -eux -o pipefail +NAME="test" exec_lxc sudo nerdctl pull --quiet $ALPINE_IMAGE NAME="test" exec_lxc nerdctl pull --quiet $ALPINE_IMAGE sudo lxc stop test @@ -26,6 +28,23 @@ sleep 5 TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') +echo "===== Benchmark: iperf3 rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-server $ALPINE_IMAGE sleep infinity" + NAME="test" exec_lxc sudo nerdctl exec iperf3-server apk add --no-cache iperf3 + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-client $ALPINE_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh iperf3-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc sudo nerdctl exec iperf3-client apk add --no-cache iperf3 + + NAME="test" exec_lxc systemd-run --user --unit iperf3-server sudo nerdctl exec iperf3-server iperf3 -s + NAME="test2" exec_lxc sudo nerdctl exec iperf3-client iperf3 -c $TEST1_VXLAN_ADDR -i 0 --connect-timeout 1000 -J > iperf3-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f iperf3-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc sudo nerdctl rm -f iperf3-client +) + echo "===== Benchmark: iperf3 client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" ( NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name iperf3-server $ALPINE_IMAGE sleep infinity" diff --git a/benchmark/postgres/postgres.sh b/benchmark/postgres/postgres.sh index e860290..4d3b79f 100755 --- a/benchmark/postgres/postgres.sh +++ b/benchmark/postgres/postgres.sh @@ -9,9 +9,45 @@ source ~/.profile cd $(dirname $0) . ../../util.sh +sudo nerdctl pull --quiet $POSTGRES_IMAGE nerdctl pull --quiet $POSTGRES_IMAGE HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +echo "===== Benchmark: postgresql rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client + set -ex + + sudo nerdctl run -d --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + sudo nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + SERVER_IP=$(sudo nerdctl exec psql-server hostname -i) + sleep 5 + sudo nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -i postgres + sudo nerdctl exec psql-client pgbench -h $SERVER_IP -U postgres -s 10 -t 1000 postgres > postgres-rootful-direct.log + + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client +) + +echo "===== Benchmark: postgresql rootful via host =====" +( + set +e + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client + set -ex + + sudo nerdctl run -d -p 15432:5432 --name psql-server -e POSTGRES_PASSWORD=pass $POSTGRES_IMAGE + sudo nerdctl run -d --name psql-client -e PGPASSWORD=pass $POSTGRES_IMAGE sleep infinity + sleep 5 + sudo nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -i postgres + sudo nerdctl exec psql-client pgbench -h $HOST_IP -p 15432 -U postgres -s 10 -t 1000 postgres > postgres-rootful-host.log + + sudo nerdctl rm -f psql-server + sudo nerdctl rm -f psql-client +) + echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( set +e diff --git a/benchmark/postgres/postgres_multinode.sh b/benchmark/postgres/postgres_multinode.sh index b0abdd8..503af07 100755 --- a/benchmark/postgres/postgres_multinode.sh +++ b/benchmark/postgres/postgres_multinode.sh @@ -4,6 +4,7 @@ cd $(dirname $0) . ../../util.sh set +e +NAME="test" exec_lxc sudo nerdctl rm -f psql-server NAME="test" exec_lxc nerdctl rm -f psql-server sudo lxc rm -f test2 @@ -16,6 +17,7 @@ POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" set -eux -o pipefail +NAME="test" exec_lxc sudo nerdctl pull --quiet $POSTGRES_IMAGE NAME="test" exec_lxc nerdctl pull --quiet $POSTGRES_IMAGE sudo lxc stop test @@ -27,6 +29,20 @@ sleep 5 TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') +echo "===== Benchmark: postgresql rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name psql-server -e POSTGRES_PASSWORD=pass -d $POSTGRES_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name psql-client -e PGPASSWORD=pass -d $POSTGRES_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh psql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + NAME="test2" exec_lxc sudo nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -i postgres + NAME="test2" exec_lxc sudo nerdctl exec psql-client pgbench -h $TEST1_VXLAN_ADDR -U postgres -s 10 -t 1000 postgres > postgres-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f psql-server + NAME="test2" exec_lxc sudo nerdctl rm -f psql-client +) + echo "===== Benchmark: postgresql client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" ( NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name psql-server -e POSTGRES_PASSWORD=pass -d $POSTGRES_IMAGE" diff --git a/benchmark/redis/redis.sh b/benchmark/redis/redis.sh index 9fdb66b..a71a3b0 100755 --- a/benchmark/redis/redis.sh +++ b/benchmark/redis/redis.sh @@ -9,8 +9,42 @@ REDIS_IMAGE="redis:${REDIS_VERSION}" source ~/.profile HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +sudo nerdctl pull --quiet $REDIS_IMAGE nerdctl pull --quiet $REDIS_IMAGE +echo "===== Benchmark: redis rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client + set -ex + + sudo nerdctl run -d --name redis-server "${REDIS_IMAGE}" + sudo nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + SERVER_IP=$(sudo nerdctl exec redis-server hostname -i) + sudo nerdctl exec redis-client redis-benchmark -q -h $SERVER_IP --csv > redis-rootful-direct.log + cat redis-rootful-direct.log + + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client +) + +echo "===== Benchmark: redis rootful via host =====" +( + set +e + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client + set -ex + + sudo nerdctl run -d -p 6380:6379 --name redis-server "${REDIS_IMAGE}" + sudo nerdctl run -d --name redis-client "${REDIS_IMAGE}" sleep infinity + sudo nerdctl exec redis-client redis-benchmark -q -h $HOST_IP -p 6380 --csv > redis-rootful-host.log + cat redis-rootful-host.log + + sudo nerdctl rm -f redis-server + sudo nerdctl rm -f redis-client +) + echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" ( set +e diff --git a/benchmark/redis/redis_multinode.sh b/benchmark/redis/redis_multinode.sh index 1c71aa1..06ecf71 100755 --- a/benchmark/redis/redis_multinode.sh +++ b/benchmark/redis/redis_multinode.sh @@ -4,6 +4,7 @@ cd $(dirname $0) . ../../util.sh set +e +NAME="test" exec_lxc sudo nerdctl rm -f redis-server NAME="test" exec_lxc nerdctl rm -f redis-server sudo lxc rm -f test2 @@ -16,6 +17,7 @@ REDIS_IMAGE="redis:${REDIS_VERSION}" set -eux -o pipefail +NAME="test" exec_lxc sudo nerdctl pull --quiet $REDIS_IMAGE NAME="test" exec_lxc nerdctl pull --quiet $REDIS_IMAGE sudo lxc stop test @@ -27,6 +29,18 @@ sleep 5 TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') +echo "===== Benchmark: redis rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name redis-server -d $REDIS_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name redis-client -d $REDIS_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh redis-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + NAME="test2" exec_lxc sudo nerdctl exec redis-client redis-benchmark -q -h $TEST1_VXLAN_ADDR --csv > redis-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f redis-server + NAME="test2" exec_lxc sudo nerdctl rm -f redis-client +) + echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" ( NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name redis-server -d $REDIS_IMAGE" @@ -39,7 +53,6 @@ echo "===== Benchmark: redis client(w/o bypass4netns) server(w/o bypass4netns) w NAME="test2" exec_lxc nerdctl rm -f redis-client ) - echo "===== Benchmark: redis client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" ( NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 diff --git a/test/init_test.sh b/test/init_test.sh index 64c5f15..87dddcd 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -51,4 +51,5 @@ echo "===== Prepare =====" # also enable rootful containerd for rootful container testing sudo systemctl enable --now containerd + sudo systemctl enable --now buildkit ) diff --git a/util.sh b/util.sh index 268f8c4..051c0cb 100644 --- a/util.sh +++ b/util.sh @@ -3,7 +3,11 @@ set -eu -o pipefail function exec_netns() { - nsenter -t $PID -F -U --preserve-credentials -n -- "$@" + if [ $EUID -eq 0 ]; then + nsenter -t $PID -F -n -- "$@" + else + nsenter -t $PID -F -U --preserve-credentials -n -- "$@" + fi } function exec_lxc() { From d33f3d6eeb157bab8f7cdd1c30f6de45586d7186 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 1 Dec 2023 07:57:34 +0000 Subject: [PATCH 46/55] split plots Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 08d91d1..4db6e73 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -184,10 +184,14 @@ jobs: name: benchmark-results path: ./ - run: mkdir /tmp/benchmark-plots - - run: python3 benchmark/redis/redis_plot.py redis-rootful-direct.log redis-rootful-host.log redis-multinode-rootful.log redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-multinode-wo-b4ns.log redis-w-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis.png - - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-rootful-direct.log iperf3-rootful-host.log iperf3-multinode-rootful.log iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-multinode-wo-b4ns.log iperf3-w-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3.png - - run: python3 benchmark/postgres/postgres_plot.py postgres-rootful-direct.log postgres-rootful-host.log postgres-multinode-rootful.log postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-multinode-wo-b4ns.log postgres-w-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres.png - - run: python3 benchmark/block/block_plot.py block-rootful-direct.log block-rootful-host.log block-multinode-rootful.log block-wo-b4ns-direct.log block-wo-b4ns-host.log block-multinode-wo-b4ns.log block-w-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block.png + - run: python3 benchmark/redis/redis_plot.py redis-rootful-direct.log redis-rootful-host.log redis-wo-b4ns-direct.log redis-wo-b4ns-host.log redis-w-b4ns.log /tmp/benchmark-plots/redis.png + - run: python3 benchmark/redis/redis_plot.py redis-multinode-rootful.log redis-multinode-wo-b4ns.log redis-multinode-w-b4ns.log /tmp/benchmark-plots/redis-multinode.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-rootful-direct.log iperf3-rootful-host.log iperf3-wo-b4ns-direct.log iperf3-wo-b4ns-host.log iperf3-w-b4ns.log /tmp/benchmark-plots/iperf3.png + - run: python3 benchmark/iperf3/iperf3_plot.py iperf3-multinode-rootful.log iperf3-multinode-wo-b4ns.log iperf3-multinode-w-b4ns.log /tmp/benchmark-plots/iperf3-multinode.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-rootful-direct.log postgres-rootful-host.log postgres-wo-b4ns-direct.log postgres-wo-b4ns-host.log postgres-w-b4ns.log /tmp/benchmark-plots/postgres.png + - run: python3 benchmark/postgres/postgres_plot.py postgres-multinode-rootful.log postgres-multinode-wo-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres-multinode.png + - run: python3 benchmark/block/block_plot.py block-rootful-direct.log block-rootful-host.log block-wo-b4ns-direct.log block-wo-b4ns-host.log block-w-b4ns.log /tmp/benchmark-plots/block.png + - run: python3 benchmark/block/block_plot.py block-multinode-rootful.log block-multinode-wo-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block-multinode.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots From be42b62727da1dbcc6f76a21b058921b19cf2ae6 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sun, 3 Dec 2023 10:14:37 +0000 Subject: [PATCH 47/55] add memcached benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 6 +- benchmark/memcached/memcached.sh | 135 +++++++++++++++++++++ benchmark/memcached/memcached_multinode.sh | 81 +++++++++++++ benchmark/memcached/memcached_plot.py | 55 +++++++++ 4 files changed, 275 insertions(+), 2 deletions(-) create mode 100755 benchmark/memcached/memcached.sh create mode 100755 benchmark/memcached/memcached_multinode.sh create mode 100644 benchmark/memcached/memcached_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4db6e73..d424eac 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -144,7 +144,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -192,6 +192,8 @@ jobs: - run: python3 benchmark/postgres/postgres_plot.py postgres-multinode-rootful.log postgres-multinode-wo-b4ns.log postgres-multinode-w-b4ns.log /tmp/benchmark-plots/postgres-multinode.png - run: python3 benchmark/block/block_plot.py block-rootful-direct.log block-rootful-host.log block-wo-b4ns-direct.log block-wo-b4ns-host.log block-w-b4ns.log /tmp/benchmark-plots/block.png - run: python3 benchmark/block/block_plot.py block-multinode-rootful.log block-multinode-wo-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block-multinode.png + - run: python3 benchmark/memcached/memcached_plot.py memcached-rootful-direct.log memcached-rootful-host.log memcached-wo-b4ns-direct.log memcached-wo-b4ns-host.log memcached-w-b4ns.log /tmp/benchmark-plots/memcached.png + - run: python3 benchmark/memcached/memcached_plot.py memcached-multinode-rootful.log memcached-multinode-wo-b4ns.log memcached-multinode-w-b4ns.log /tmp/benchmark-plots/memcached-multinode.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/memcached/memcached.sh b/benchmark/memcached/memcached.sh new file mode 100755 index 0000000..df40de0 --- /dev/null +++ b/benchmark/memcached/memcached.sh @@ -0,0 +1,135 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + +MEMCACHED_VERSION=1.6.22 +MEMCACHED_IMAGE="memcached:${MEMCACHED_VERSION}" + +MEMTIRE_VERSION=2.0.0 +MEMTIRE_IMAGE="redislabs/memtier_benchmark:${MEMTIRE_VERSION}" + +source ~/.profile + +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +sudo nerdctl pull --quiet $MEMCACHED_IMAGE +sudo nerdctl pull --quiet $MEMTIRE_IMAGE +nerdctl pull --quiet $MEMCACHED_IMAGE +nerdctl pull --quiet $MEMTIRE_IMAGE + +echo "===== Benchmark: memcached rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client + set -ex + + sudo nerdctl run -d --name memcached-server $MEMCACHED_IMAGE + SERVER_IP=$(sudo nerdctl exec memcached-server hostname -i) + LOG_NAME="memcached-rootful-direct.log" + sudo nerdctl run --name memcached-client $MEMTIRE_IMAGE --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + sudo nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached rootful via host =====" +( + set +e + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client + set -ex + + sudo nerdctl run -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-rootful-host.log" + sudo nerdctl run --name memcached-client $MEMTIRE_IMAGE --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + sudo nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + sudo nerdctl rm -f memcached-server + sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + set -ex + + nerdctl run -d --name memcached-server $MEMCACHED_IMAGE + SERVER_IP=$(nerdctl exec memcached-server hostname -i) + LOG_NAME="memcached-wo-b4ns-direct.log" + nerdctl run -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + set -ex + + nerdctl run -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-wo-b4ns-host.log" + nerdctl run -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + LOG_NAME="memcached-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$HOST_IP --port=11212 --protocol=memcache_binary --json-out-file=/$LOG_NAME > /dev/null + nerdctl cp memcached-client:/$LOG_NAME ./$LOG_NAME + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-server -p 11212:11211 $MEMCACHED_IMAGE + SERVER_IP=$(nerdctl exec memcached-server hostname -i) + nerdctl run --label nerdctl/bypass4netns=true -d --name memcached-client --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c "sleep infinity" + nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary + + nerdctl rm -f memcached-server + nerdctl rm -f memcached-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/memcached/memcached_multinode.sh b/benchmark/memcached/memcached_multinode.sh new file mode 100755 index 0000000..f9ac038 --- /dev/null +++ b/benchmark/memcached/memcached_multinode.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f memcached-server +NAME="test" exec_lxc nerdctl rm -f memcached-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +MEMCACHED_VERSION=1.6.22 +MEMCACHED_IMAGE="memcached:${MEMCACHED_VERSION}" +MEMTIRE_VERSION=2.0.0 +MEMTIRE_IMAGE="redislabs/memtier_benchmark:${MEMTIRE_VERSION}" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $MEMCACHED_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MEMCACHED_IMAGE +NAME="test" exec_lxc sudo nerdctl pull --quiet $MEMTIRE_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MEMTIRE_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: memcached rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="memcached-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec memcached-client memtier_benchmark --host=$TEST1_VXLAN_ADDR --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc sudo nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f memcached-server + NAME="test2" exec_lxc sudo nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh memcached-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="memcached-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec memcached-client memtier_benchmark --host=$TEST1_VXLAN_ADDR --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f memcached-server + NAME="test2" exec_lxc nerdctl rm -f memcached-client +) + +echo "===== Benchmark: memcached client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 11212:11211 --name memcached-server -d $MEMCACHED_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name memcached-client -d --entrypoint '' $MEMTIRE_IMAGE /bin/sh -c 'sleep infinity'" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec memcached-server hostname -i) + sleep 5 + LOG_NAME="memcached-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec memcached-client memtier_benchmark --host=$SERVER_IP --port=11211 --protocol=memcache_binary --json-out-file=/$LOG_NAME + NAME="test2" exec_lxc nerdctl exec memcached-client cat /$LOG_NAME > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f memcached-server + NAME="test2" exec_lxc nerdctl rm -f memcached-client +) \ No newline at end of file diff --git a/benchmark/memcached/memcached_plot.py b/benchmark/memcached/memcached_plot.py new file mode 100644 index 0000000..e90ac7e --- /dev/null +++ b/benchmark/memcached/memcached_plot.py @@ -0,0 +1,55 @@ +import matplotlib.pyplot as plt +import numpy as np +import json +import sys + + +BAR_WIDTH=0.4 + +def load_data(filename): + data = {} + with open(filename) as f: + d = json.load(f) + if d == None: + raise Exception("{} has invalid json format".format(filename)) + data["Ops/sec"] = [] + data["Ops/sec"].append(d["ALL STATS"]["Sets"]["Ops/sec"]) + data["Ops/sec"].append(d["ALL STATS"]["Gets"]["Ops/sec"]) + data["Latency"] = [] + data["Latency"].append(d["ALL STATS"]["Sets"]["Latency"]) + data["Latency"].append(d["ALL STATS"]["Gets"]["Latency"]) + return data + +labels=['Sets(Ops/sec)', 'Gets(Ops/sec)', 'Sets(Latency)', 'Gets(Latency)'] + +plt.ylabel("Ops / seconds") + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Operations / second") +ax2 = ax1.twinx() +ax2.set_ylabel("latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor+BAR_WIDTH*i], datas[i]["Ops/sec"], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor*2+BAR_WIDTH*i, factor*3+BAR_WIDTH*i], datas[i]["Latency"], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From b7586253bbcc924941a5e06b2a9afde06461842d Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sun, 3 Dec 2023 16:28:51 +0000 Subject: [PATCH 48/55] add rabbitmq benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 6 +- benchmark/rabbitmq/rabbitmq.sh | 119 +++++++++++++++++++++++ benchmark/rabbitmq/rabbitmq_multinode.sh | 80 +++++++++++++++ benchmark/rabbitmq/rabbitmq_plot.py | 46 +++++++++ 4 files changed, 249 insertions(+), 2 deletions(-) create mode 100755 benchmark/rabbitmq/rabbitmq.sh create mode 100755 benchmark/rabbitmq/rabbitmq_multinode.sh create mode 100644 benchmark/rabbitmq/rabbitmq_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d424eac..2d7261e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -144,7 +144,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -194,6 +194,8 @@ jobs: - run: python3 benchmark/block/block_plot.py block-multinode-rootful.log block-multinode-wo-b4ns.log block-multinode-w-b4ns.log /tmp/benchmark-plots/block-multinode.png - run: python3 benchmark/memcached/memcached_plot.py memcached-rootful-direct.log memcached-rootful-host.log memcached-wo-b4ns-direct.log memcached-wo-b4ns-host.log memcached-w-b4ns.log /tmp/benchmark-plots/memcached.png - run: python3 benchmark/memcached/memcached_plot.py memcached-multinode-rootful.log memcached-multinode-wo-b4ns.log memcached-multinode-w-b4ns.log /tmp/benchmark-plots/memcached-multinode.png + - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-rootful-direct.log rabbitmq-rootful-host.log rabbitmq-wo-b4ns-direct.log rabbitmq-wo-b4ns-host.log rabbitmq-w-b4ns.log /tmp/benchmark-plots/rabbitmq.png + - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-multinode-rootful.log rabbitmq-multinode-wo-b4ns.log rabbitmq-multinode-w-b4ns.log /tmp/benchmark-plots/rabbitmq-multinode.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/rabbitmq/rabbitmq.sh b/benchmark/rabbitmq/rabbitmq.sh new file mode 100755 index 0000000..dfe562b --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +RABBITMQ_VERSION=3.12.10 +RABBITMQ_IMAGE="rabbitmq:$RABBITMQ_VERSION" + +PERF_VERSION="2.20.0" +PERF_IMAGE="pivotalrabbitmq/perf-test:$PERF_VERSION" + +source ~/.profile +cd $(dirname $0) + +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +sudo nerdctl pull --quiet $RABBITMQ_IMAGE +sudo nerdctl pull --quiet $PERF_IMAGE +nerdctl pull --quiet $RABBITMQ_IMAGE +nerdctl pull --quiet $PERF_IMAGE + +echo "===== Benchmark: rabbitmq rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f rabbitmq-server + set -ex + + sudo nerdctl run -d --name rabbitmq-server $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(sudo nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-rootful-direct.log" + sudo nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + sudo nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq rootful via host =====" +( + set +e + sudo nerdctl rm -f rabbitmq-server + set -ex + + sudo nerdctl run -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(sudo nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-rootful-host.log" + sudo nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + sudo nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f rabbitmq-server + set -ex + + nerdctl run -d --name rabbitmq-server $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-wo-b4ns-direct.log" + nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f rabbitmq-server + set -ex + + nerdctl run -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-wo-b4ns-host.log" + nerdctl run --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + LOG_NAME="rabbitmq-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$HOST_IP:5673 --producers 2 --consumers 2 --time 60 > $LOG_NAME + + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name rabbitmq-server -p 5673:5672 $RABBITMQ_IMAGE + sleep 10 + SERVER_IP=$(nerdctl exec rabbitmq-server hostname -i) + nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client --rm $PERF_IMAGE --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 + + nerdctl rm -f rabbitmq-server + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) \ No newline at end of file diff --git a/benchmark/rabbitmq/rabbitmq_multinode.sh b/benchmark/rabbitmq/rabbitmq_multinode.sh new file mode 100755 index 0000000..e0f146d --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq_multinode.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f rabbitmq-server +NAME="test" exec_lxc nerdctl rm -f rabbitmq-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" + +RABBITMQ_VERSION=3.12.10 +RABBITMQ_IMAGE="rabbitmq:$RABBITMQ_VERSION" + +PERF_VERSION="2.20.0" +PERF_IMAGE="pivotalrabbitmq/perf-test:$PERF_VERSION" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $RABBITMQ_IMAGE +NAME="test" exec_lxc sudo nerdctl pull --quiet $PERF_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $RABBITMQ_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $PERF_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: rabbitmq rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="rabbitmq-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$TEST1_VXLAN_ADDR --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc sudo nerdctl rm -f rabbitmq-client +) + +echo "===== Benchmark: rabbitmq client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh rabbitmq-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="rabbitmq-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$TEST1_VXLAN_ADDR --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc nerdctl rm -f rabbitmq-client +) + +echo "===== Benchmark: rabbitmq client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 5673:5672 --name rabbitmq-server -d $RABBITMQ_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name rabbitmq-client -d --entrypoint '' $PERF_IMAGE /bin/sh -c 'sleep infinity'" + sleep 5 + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec rabbitmq-server hostname -i) + LOG_NAME="rabbitmq-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec rabbitmq-client java -jar /perf_test/perf-test.jar --uri amqp://$SERVER_IP --producers 2 --consumers 2 --time 60 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f rabbitmq-server + NAME="test2" exec_lxc nerdctl rm -f rabbitmq-client +) diff --git a/benchmark/rabbitmq/rabbitmq_plot.py b/benchmark/rabbitmq/rabbitmq_plot.py new file mode 100644 index 0000000..213a1e5 --- /dev/null +++ b/benchmark/rabbitmq/rabbitmq_plot.py @@ -0,0 +1,46 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip() + if "sending rate avg" in line: + data["Sending"] = int(line.split(" ")[5]) + if "receiving rate avg" in line: + data["Receiving"] = int(line.split(" ")[5]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['Sending', 'Receiving'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("messages / second") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor + BAR_WIDTH*i], [datas[i][labels[0]], datas[i][labels[0]]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From b297fe6a874c499a5b216151101f53340ca0362f Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 5 Dec 2023 09:22:50 +0000 Subject: [PATCH 49/55] add etcd benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 6 +- benchmark/etcd/Dockerfile | 14 +++ benchmark/etcd/etcd.sh | 150 +++++++++++++++++++++++++++++++ benchmark/etcd/etcd_multinode.sh | 90 +++++++++++++++++++ benchmark/etcd/etcd_plot.py | 53 +++++++++++ 5 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 benchmark/etcd/Dockerfile create mode 100755 benchmark/etcd/etcd.sh create mode 100755 benchmark/etcd/etcd_multinode.sh create mode 100644 benchmark/etcd/etcd_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2d7261e..42e8235 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -144,7 +144,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -196,6 +196,8 @@ jobs: - run: python3 benchmark/memcached/memcached_plot.py memcached-multinode-rootful.log memcached-multinode-wo-b4ns.log memcached-multinode-w-b4ns.log /tmp/benchmark-plots/memcached-multinode.png - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-rootful-direct.log rabbitmq-rootful-host.log rabbitmq-wo-b4ns-direct.log rabbitmq-wo-b4ns-host.log rabbitmq-w-b4ns.log /tmp/benchmark-plots/rabbitmq.png - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-multinode-rootful.log rabbitmq-multinode-wo-b4ns.log rabbitmq-multinode-w-b4ns.log /tmp/benchmark-plots/rabbitmq-multinode.png + - run: python3 benchmark/etcd/etcd_plot.py etcd-rootful-direct.log etcd-rootful-host.log etcd-wo-b4ns-direct.log etcd-wo-b4ns-host.log etcd-w-b4ns.log /tmp/benchmark-plots/etcd.png + - run: python3 benchmark/etcd/etcd_plot.py etcd-multinode-rootful.log etcd-multinode-wo-b4ns.log etcd-multinode-w-b4ns.log /tmp/benchmark-plots/etcd-multinode.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/etcd/Dockerfile b/benchmark/etcd/Dockerfile new file mode 100644 index 0000000..d2a21e4 --- /dev/null +++ b/benchmark/etcd/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.21.3 as bench-builder + +RUN mkdir /etcd +RUN curl -fsSL https://github.com/etcd-io/etcd/archive/refs/tags/v3.5.10.tar.gz | tar -xz --no-same-owner --no-same-permissions --strip-components 1 -C /etcd +RUN ls -l /etcd + +# static link +RUN cd /etcd/tools/benchmark && CGO_ENABLED=0 go build -o /bench main.go + +FROM ubuntu:22.04 + +COPY --from=bench-builder /bench /bench + +CMD ["/bin/bash", "-c", "sleep infinity"] diff --git a/benchmark/etcd/etcd.sh b/benchmark/etcd/etcd.sh new file mode 100755 index 0000000..d932e38 --- /dev/null +++ b/benchmark/etcd/etcd.sh @@ -0,0 +1,150 @@ +#!/bin/bash +set -eu -o pipefail + +cd $(dirname $0) + + +ETCD_VERSION="v3.3.25" +ETCD_IMAGE="quay.io/coreos/etcd:${ETCD_VERSION}" +BENCH_IMAGE="etcd-bench" + +source ~/.profile + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit +sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . +nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . + +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) +sudo nerdctl pull --quiet $ETCD_IMAGE +nerdctl pull --quiet $ETCD_IMAGE + +echo "===== Benchmark: etcd rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed + set -ex + + sudo nerdctl run -d --name etcd-server $ETCD_IMAGE /bin/sh -c "sleep infinity" + SERVER_IP=$(sudo nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server sudo nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + sleep 5 + LOG_NAME="etcd-rootful-direct.log" + sudo nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd rootful via host =====" +( + set +e + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client + set -ex + + sudo nerdctl run -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-rootful-host.log" + sudo nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + sudo nerdctl rm -f etcd-server + sudo nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed + set -ex + + nerdctl run -d --name etcd-server $ETCD_IMAGE /bin/sh -c "sleep infinity" + SERVER_IP=$(nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + sleep 5 + LOG_NAME="etcd-wo-b4ns-direct.log" + nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop etcd-server + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + set -ex + + nerdctl run -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-wo-b4ns-host.log" + nerdctl run --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed + set -ex + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$HOST_IP:2379 + sleep 5 + LOG_NAME="etcd-w-b4ns.log" + nerdctl run --label nerdctl/bypass4netns=true --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $HOST_IP:12379 > $LOG_NAME + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user reset-failed +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d --name etcd-server -p 12379:2379 $ETCD_IMAGE /bin/sh -c "sleep infinity" + sleep 5 + SERVER_IP=$(nerdctl exec etcd-server hostname -i) + systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + nerdctl run --label nerdctl/bypass4netns=true --rm $BENCH_IMAGE /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 + + nerdctl rm -f etcd-server + nerdctl rm -f etcd-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/etcd/etcd_multinode.sh b/benchmark/etcd/etcd_multinode.sh new file mode 100755 index 0000000..1eb3d98 --- /dev/null +++ b/benchmark/etcd/etcd_multinode.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f etcd-server +NAME="test" exec_lxc nerdctl rm -f etcd-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +ETCD_VERSION="v3.3.25" +ETCD_IMAGE="quay.io/coreos/etcd:${ETCD_VERSION}" +BENCH_IMAGE="etcd-bench" + +set -eux -o pipefail + +NAME="test" exec_lxc sudo nerdctl pull --quiet $ETCD_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $ETCD_IMAGE +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/etcd && sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/etcd && nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: etcd rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc systemd-run --user --unit etcd-server sudo nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$TEST1_VXLAN_ADDR:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="etcd-multinode-rootful.log" + NAME="test2" exec_lxc sudo nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $TEST1_VXLAN_ADDR:2379 > $LOG_NAME + + NAME="test" exec_lxc sudo nerdctl rm -f etcd-server + NAME="test" exec_lxc systemctl --user stop etcd-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc sudo nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test" exec_lxc systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$TEST1_VXLAN_ADDR:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh etcd-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 5 + LOG_NAME="etcd-multinode-wo-b4ns.log" + NAME="test2" exec_lxc nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $TEST1_VXLAN_ADDR:2379 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f etcd-server + NAME="test" exec_lxc systemctl --user stop etcd-server + NAME="test" exec_lxc systemctl --user reset-failed + NAME="test2" exec_lxc nerdctl rm -f etcd-client +) + +echo "===== Benchmark: etcd client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -p 12379:2379 --name etcd-server -d $ETCD_IMAGE /bin/sh -c 'sleep infinity'" + SERVER_IP=$(NAME="test" exec_lxc nerdctl exec etcd-server hostname -i) + NAME="test" exec_lxc systemd-run --user --unit etcd-server nerdctl exec etcd-server /usr/local/bin/etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://$SERVER_IP:2379 + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true --name etcd-client -d $BENCH_IMAGE /bin/sh -c 'sleep infinity'" + sleep 5 + LOG_NAME="etcd-multinode-w-b4ns.log" + NAME="test2" exec_lxc nerdctl exec etcd-client /bench put --key-size=8 --val-size=256 --conns=10 --clients=10 --total=100000 --endpoints $SERVER_IP:2379 > $LOG_NAME + + NAME="test" exec_lxc nerdctl rm -f etcd-server + NAME="test2" exec_lxc nerdctl rm -f etcd-client +) \ No newline at end of file diff --git a/benchmark/etcd/etcd_plot.py b/benchmark/etcd/etcd_plot.py new file mode 100644 index 0000000..2e8a399 --- /dev/null +++ b/benchmark/etcd/etcd_plot.py @@ -0,0 +1,53 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = line.strip().replace("\t", " ") + if "Requests/sec" in line: + data["Requests/sec"] = float(line.split(" ")[1]) + if "Average" in line: + data["Latency(ms)"] = float(line.split(" ")[1]) * 1000 + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['Requests/sec', 'Latency(ms)'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Requests / second") +ax2 = ax1.twinx() +ax2.set_ylabel("Average latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i], datas[i][labels[0]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor+BAR_WIDTH*i], datas[i][labels[1]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From ac9e09fe58278d332474b6aeb1d140c33d1ba802 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Tue, 5 Dec 2023 17:25:52 +0000 Subject: [PATCH 50/55] add mysql benchmark Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 6 +- benchmark/mysql/Dockerfile | 7 ++ benchmark/mysql/mysql.sh | 146 +++++++++++++++++++++++++++++ benchmark/mysql/mysql_multinode.sh | 87 +++++++++++++++++ benchmark/mysql/mysql_plot.py | 55 +++++++++++ 5 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 benchmark/mysql/Dockerfile create mode 100755 benchmark/mysql/mysql.sh create mode 100755 benchmark/mysql/mysql_multinode.sh create mode 100644 benchmark/mysql/mysql_plot.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 42e8235..2a21607 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd"] + script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -144,7 +144,7 @@ jobs: timeout-minutes: 20 strategy: matrix: - script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd"] + script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] steps: - uses: actions/checkout@v4.1.1 - name: setup lxd (v5.19) @@ -198,6 +198,8 @@ jobs: - run: python3 benchmark/rabbitmq/rabbitmq_plot.py rabbitmq-multinode-rootful.log rabbitmq-multinode-wo-b4ns.log rabbitmq-multinode-w-b4ns.log /tmp/benchmark-plots/rabbitmq-multinode.png - run: python3 benchmark/etcd/etcd_plot.py etcd-rootful-direct.log etcd-rootful-host.log etcd-wo-b4ns-direct.log etcd-wo-b4ns-host.log etcd-w-b4ns.log /tmp/benchmark-plots/etcd.png - run: python3 benchmark/etcd/etcd_plot.py etcd-multinode-rootful.log etcd-multinode-wo-b4ns.log etcd-multinode-w-b4ns.log /tmp/benchmark-plots/etcd-multinode.png + - run: python3 benchmark/mysql/mysql_plot.py mysql-rootful-direct.log mysql-rootful-host.log mysql-wo-b4ns-direct.log mysql-wo-b4ns-host.log mysql-w-b4ns.log /tmp/benchmark-plots/mysql.png + - run: python3 benchmark/mysql/mysql_plot.py mysql-multinode-rootful.log mysql-multinode-wo-b4ns.log mysql-multinode-w-b4ns.log /tmp/benchmark-plots/mysql-multinode.png - uses: actions/upload-artifact@v3 with: name: benchmark-plots diff --git a/benchmark/mysql/Dockerfile b/benchmark/mysql/Dockerfile new file mode 100644 index 0000000..4089b96 --- /dev/null +++ b/benchmark/mysql/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:22.04 + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y sysbench + +CMD ["/bin/bash", "-c", "sleep infinity"] + diff --git a/benchmark/mysql/mysql.sh b/benchmark/mysql/mysql.sh new file mode 100755 index 0000000..00adbea --- /dev/null +++ b/benchmark/mysql/mysql.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +set -eu -o pipefail + +MYSQL_VERSION=8.2.0 +MYSQL_IMAGE="mysql:$MYSQL_VERSION" +BENCH_IMAGE="mysql-bench" + +source ~/.profile +cd $(dirname $0) +. ../../util.sh + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +systemctl --user restart containerd +sleep 1 +systemctl --user restart buildkit +sleep 3 +systemctl --user status --no-pager containerd +systemctl --user status --no-pager buildkit +sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . +nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . + +sudo nerdctl pull --quiet $MYSQL_IMAGE +nerdctl pull --quiet $MYSQL_IMAGE +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) + +echo "===== Benchmark: mysql rootful via NetNS =====" +( + set +e + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client + set -ex + + sudo nerdctl run -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + sudo nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(sudo nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-rootful-direct.log + + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql rootful via host =====" +( + set +e + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client + set -ex + + sudo nerdctl run -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + sudo nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-rootful-host.log + + sudo nerdctl rm -f mysql-server + sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) via intermediate NetNS =====" +( + set +e + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + set -ex + + nerdctl run -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-wo-b4ns-direct.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) via host =====" +( + set +e + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + set -ex + + nerdctl run -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-wo-b4ns-host.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) via host =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user reset-failed + set -ex + + systemd-run --user --unit run-bypass4netnsd bypass4netnsd + + nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$HOST_IP --mysql-port=13306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-w-b4ns.log + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop run-bypass4netnsd +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + set +e + systemctl --user stop run-bypass4netnsd + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop etcd.service + systemctl --user reset-failed + set -ex + + systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$HOST_IP:2379 --advertise-client-urls http://$HOST_IP:2379 + systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$HOST_IP:2379 --multinode-host-address=$HOST_IP + + nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE + nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity + SERVER_IP=$(nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-port=3306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-port=3306 --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run + + nerdctl rm -f mysql-server + nerdctl rm -f mysql-client + systemctl --user stop run-bypass4netnsd + systemctl --user stop etcd.service + systemctl --user reset-failed +) diff --git a/benchmark/mysql/mysql_multinode.sh b/benchmark/mysql/mysql_multinode.sh new file mode 100755 index 0000000..ca20dc0 --- /dev/null +++ b/benchmark/mysql/mysql_multinode.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +cd $(dirname $0) +. ../../util.sh + +set +e +NAME="test" exec_lxc sudo nerdctl rm -f mysql-server +NAME="test" exec_lxc nerdctl rm -f mysql-server +sudo lxc rm -f test2 + +TEST1_VXLAN_MAC="02:42:c0:a8:00:1" +TEST1_VXLAN_ADDR="192.168.2.1" +TEST2_VXLAN_MAC="02:42:c0:a8:00:2" +TEST2_VXLAN_ADDR="192.168.2.2" +MYSQL_VERSION=8.2.0 +MYSQL_IMAGE="mysql:$MYSQL_VERSION" +BENCH_IMAGE="mysql-bench" + +set -eux -o pipefail + +# sometimes fail to pull images +# this is workaround +# https://github.com/containerd/nerdctl/issues/622 +NAME="test" exec_lxc systemctl --user restart containerd +sleep 1 +NAME="test" exec_lxc systemctl --user restart buildkit +sleep 3 +NAME="test" exec_lxc systemctl --user status --no-pager containerd +NAME="test" exec_lxc systemctl --user status --no-pager buildkit +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/mysql && sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." +NAME="test" exec_lxc /bin/bash -c "cd /home/ubuntu/bypass4netns/benchmark/mysql && nerdctl build -f ./Dockerfile -t $BENCH_IMAGE ." + +NAME="test" exec_lxc sudo nerdctl pull --quiet $MYSQL_IMAGE +NAME="test" exec_lxc nerdctl pull --quiet $MYSQL_IMAGE + +sudo lxc stop test +sudo lxc copy test test2 +sudo lxc start test +sudo lxc start test2 +sleep 5 + +TEST_ADDR=$(sudo lxc exec test -- hostname -I | sed 's/ //') +TEST2_ADDR=$(sudo lxc exec test2 -- hostname -I | sed 's/ //') + +echo "===== Benchmark: mysql rootful with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && sudo nerdctl run -p 4789:4789/udp --privileged -d --name mysql-client $BENCH_IMAGE sleep infinity" + NAME="test2" exec_lxc sudo /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 30 + NAME="test2" exec_lxc sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc sudo nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-rootful.log + + NAME="test" exec_lxc sudo nerdctl rm -f mysql-server + NAME="test2" exec_lxc sudo nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/o bypass4netns) server(w/o bypass4netns) with multinode via VXLAN =====" +( + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-server $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR $TEST2_ADDR $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run -p 4789:4789/udp --privileged -d --name mysql-client $BENCH_IMAGE sleep infinity" + NAME="test2" exec_lxc /home/ubuntu/bypass4netns/test/setup_vxlan.sh mysql-client $TEST2_VXLAN_MAC $TEST2_VXLAN_ADDR $TEST_ADDR $TEST1_VXLAN_MAC $TEST1_VXLAN_ADDR + sleep 30 + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$TEST1_VXLAN_ADDR --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-wo-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f mysql-server + NAME="test2" exec_lxc nerdctl rm -f mysql-client +) + +echo "===== Benchmark: mysql client(w/ bypass4netns) server(w/ bypass4netns) with multinode =====" +( + NAME="test" exec_lxc systemd-run --user --unit etcd.service /usr/bin/etcd --listen-client-urls http://$TEST_ADDR:2379 --advertise-client-urls http://$TEST_ADDR:2379 + NAME="test" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST_ADDR + NAME="test2" exec_lxc systemd-run --user --unit run-bypass4netnsd bypass4netnsd --multinode=true --multinode-etcd-address=http://$TEST_ADDR:2379 --multinode-host-address=$TEST2_ADDR + NAME="test" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d -p 13306:3306 --name mysql-server -e MYSQL_ROOT_PASSWORD=pass -e MYSQL_DATABASE=bench $MYSQL_IMAGE" + NAME="test2" exec_lxc /bin/bash -c "sleep 3 && nerdctl run --label nerdctl/bypass4netns=true -d --name mysql-client $BENCH_IMAGE sleep infinity" + SERVER_IP=$(NAME="test" exec_lxc nerdctl inspect mysql-server | jq -r .[0].NetworkSettings.Networks.'"unknown-eth0"'.IPAddress) + sleep 30 + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_common prepare + NAME="test2" exec_lxc nerdctl exec mysql-client sysbench --threads=4 --time=60 --mysql-host=$SERVER_IP --mysql-db=bench --mysql-user=root --mysql-password=pass --db-driver=mysql oltp_read_write run > mysql-multinode-w-b4ns.log + + NAME="test" exec_lxc nerdctl rm -f mysql-server + NAME="test2" exec_lxc nerdctl rm -f mysql-client +) \ No newline at end of file diff --git a/benchmark/mysql/mysql_plot.py b/benchmark/mysql/mysql_plot.py new file mode 100644 index 0000000..c7aad7e --- /dev/null +++ b/benchmark/mysql/mysql_plot.py @@ -0,0 +1,55 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys +import re + + +def load_data(filename): + data = {} + with open(filename) as f: + line = f.readline() + while line: + line = re.sub('\s+', ' ', line.strip()).split(" ") + if "transactions:" in line: + data["transactions"] = float(line[2].replace("(", "")) + if "queries:" in line: + data["queries"] = float(line[2].replace("(", "")) + if "avg:" in line: + data["latency(ms)"] = float(line[1]) + line = f.readline() + return data + +BAR_WIDTH=0.25 + +labels=['transactions', 'queries', 'latency(ms)'] + +data_num = len(sys.argv)-2 +factor = (data_num+1) * BAR_WIDTH + +datas = [] +for i in range(0, data_num): + filename = sys.argv[1+i] + data = load_data(filename) + datas.append(data) + +fig = plt.figure() +ax1 = fig.add_subplot() +ax1.set_ylabel("Operations / second") +ax2 = ax1.twinx() +ax2.set_ylabel("Average latency (ms)") + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax1.bar([BAR_WIDTH*i, factor+BAR_WIDTH*i], [datas[i][labels[0]], datas[i][labels[1]]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +for i in range(0, data_num): + filename = sys.argv[1+i] + ax2.bar([factor*2+BAR_WIDTH*i], datas[i][labels[2]], align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) + +h1, l1 = ax1.get_legend_handles_labels() +ax1.legend(h1, l1, loc="upper left") +plt.xlim(0, (len(labels)-1)*factor+BAR_WIDTH*data_num) +plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) + +plt.savefig(sys.argv[1+data_num]) From 89622fa3c23eb089ea174b696f66c41e307e9c74 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Fri, 8 Dec 2023 06:31:16 +0000 Subject: [PATCH 51/55] add run_bench.sh Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 9 +++++++++ benchmark/block/block.sh | 2 +- benchmark/etcd/etcd.sh | 2 +- benchmark/iperf3/iperf3.sh | 2 +- benchmark/iperf3/iperf3_host.sh | 4 +++- benchmark/memcached/memcached.sh | 2 +- benchmark/mysql/mysql.sh | 2 +- benchmark/param.bash | 2 ++ benchmark/postgres/postgres.sh | 2 +- benchmark/rabbitmq/rabbitmq.sh | 2 +- benchmark/redis/redis.sh | 2 +- benchmark/run_bench.sh | 14 ++++++++++++++ test/init_test.sh | 17 +++++++++++++++-- 13 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 benchmark/param.bash create mode 100755 benchmark/run_bench.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2a21607..2acbcd5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -204,3 +204,12 @@ jobs: with: name: benchmark-plots path: /tmp/benchmark-plots + + bench-script: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4.1.1 + - run: sudo ip a add 192.168.6.2/32 dev eth0 + - run: hostname -I + - run: ./test/init_test.sh + - run: ~/bypass4netns/benchmark/run_bench.sh \ No newline at end of file diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh index d12042b..66bb967 100755 --- a/benchmark/block/block.sh +++ b/benchmark/block/block.sh @@ -7,6 +7,7 @@ IMAGE_NAME="block" COUNT="10" source ~/.profile +. ../param.bash ./gen_blocks.sh @@ -24,7 +25,6 @@ sudo nerdctl build -f ./Dockerfile -t $IMAGE_NAME . nerdctl build -f ./Dockerfile -t $IMAGE_NAME . BLOCK_SIZES=('1k' '32k' '128k' '512k' '1m' '32m' '128m' '512m' '1g') -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: block rooful via NetNS =====" ( diff --git a/benchmark/etcd/etcd.sh b/benchmark/etcd/etcd.sh index d932e38..4a43fe6 100755 --- a/benchmark/etcd/etcd.sh +++ b/benchmark/etcd/etcd.sh @@ -9,6 +9,7 @@ ETCD_IMAGE="quay.io/coreos/etcd:${ETCD_VERSION}" BENCH_IMAGE="etcd-bench" source ~/.profile +. ../param.bash # sometimes fail to pull images # this is workaround @@ -22,7 +23,6 @@ systemctl --user status --no-pager buildkit sudo nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) sudo nerdctl pull --quiet $ETCD_IMAGE nerdctl pull --quiet $ETCD_IMAGE diff --git a/benchmark/iperf3/iperf3.sh b/benchmark/iperf3/iperf3.sh index ae5a9f5..5a7394f 100755 --- a/benchmark/iperf3/iperf3.sh +++ b/benchmark/iperf3/iperf3.sh @@ -7,10 +7,10 @@ cd $(dirname $0) ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" source ~/.profile +. ../param.bash sudo nerdctl pull --quiet $ALPINE_IMAGE nerdctl pull --quiet $ALPINE_IMAGE -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: iperf3 rootful via NetNS =====" ( diff --git a/benchmark/iperf3/iperf3_host.sh b/benchmark/iperf3/iperf3_host.sh index 464f1b2..ab2950b 100755 --- a/benchmark/iperf3/iperf3_host.sh +++ b/benchmark/iperf3/iperf3_host.sh @@ -2,12 +2,14 @@ set -eu -o pipefail +cd $(dirname $0) + source ~/.profile +. ../param.bash ALPINE_IMAGE="public.ecr.aws/docker/library/alpine:3.16" nerdctl pull --quiet "${ALPINE_IMAGE}" -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) systemd-run --user --unit run-iperf3 iperf3 -s echo "===== Benchmark: netns -> host With bypass4netns =====" diff --git a/benchmark/memcached/memcached.sh b/benchmark/memcached/memcached.sh index df40de0..e984ff7 100755 --- a/benchmark/memcached/memcached.sh +++ b/benchmark/memcached/memcached.sh @@ -10,8 +10,8 @@ MEMTIRE_VERSION=2.0.0 MEMTIRE_IMAGE="redislabs/memtier_benchmark:${MEMTIRE_VERSION}" source ~/.profile +. ../param.bash -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) sudo nerdctl pull --quiet $MEMCACHED_IMAGE sudo nerdctl pull --quiet $MEMTIRE_IMAGE nerdctl pull --quiet $MEMCACHED_IMAGE diff --git a/benchmark/mysql/mysql.sh b/benchmark/mysql/mysql.sh index 00adbea..527825c 100755 --- a/benchmark/mysql/mysql.sh +++ b/benchmark/mysql/mysql.sh @@ -9,6 +9,7 @@ BENCH_IMAGE="mysql-bench" source ~/.profile cd $(dirname $0) . ../../util.sh +. ../param.bash # sometimes fail to pull images # this is workaround @@ -24,7 +25,6 @@ nerdctl build -f ./Dockerfile -t $BENCH_IMAGE . sudo nerdctl pull --quiet $MYSQL_IMAGE nerdctl pull --quiet $MYSQL_IMAGE -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: mysql rootful via NetNS =====" ( diff --git a/benchmark/param.bash b/benchmark/param.bash new file mode 100644 index 0000000..9f66510 --- /dev/null +++ b/benchmark/param.bash @@ -0,0 +1,2 @@ +HOST_IP_PREFIX="192.168.6." +HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q $HOST_IP_PREFIX; if [ $? -eq 0 ]; then echo $i; fi; done) diff --git a/benchmark/postgres/postgres.sh b/benchmark/postgres/postgres.sh index 4d3b79f..be87571 100755 --- a/benchmark/postgres/postgres.sh +++ b/benchmark/postgres/postgres.sh @@ -8,10 +8,10 @@ POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" source ~/.profile cd $(dirname $0) . ../../util.sh +. ../param.bash sudo nerdctl pull --quiet $POSTGRES_IMAGE nerdctl pull --quiet $POSTGRES_IMAGE -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) echo "===== Benchmark: postgresql rootful via NetNS =====" ( diff --git a/benchmark/rabbitmq/rabbitmq.sh b/benchmark/rabbitmq/rabbitmq.sh index dfe562b..7b3a243 100755 --- a/benchmark/rabbitmq/rabbitmq.sh +++ b/benchmark/rabbitmq/rabbitmq.sh @@ -8,8 +8,8 @@ PERF_IMAGE="pivotalrabbitmq/perf-test:$PERF_VERSION" source ~/.profile cd $(dirname $0) +. ../param.bash -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) sudo nerdctl pull --quiet $RABBITMQ_IMAGE sudo nerdctl pull --quiet $PERF_IMAGE nerdctl pull --quiet $RABBITMQ_IMAGE diff --git a/benchmark/redis/redis.sh b/benchmark/redis/redis.sh index a71a3b0..36f73e7 100755 --- a/benchmark/redis/redis.sh +++ b/benchmark/redis/redis.sh @@ -7,8 +7,8 @@ REDIS_VERSION=7.2.3 REDIS_IMAGE="redis:${REDIS_VERSION}" source ~/.profile +. ../param.bash -HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) sudo nerdctl pull --quiet $REDIS_IMAGE nerdctl pull --quiet $REDIS_IMAGE diff --git a/benchmark/run_bench.sh b/benchmark/run_bench.sh new file mode 100755 index 0000000..9ed00cd --- /dev/null +++ b/benchmark/run_bench.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -e + +cd $(dirname $0) + +BENCHMARKS=(iperf3 block redis memcached etcd rabbitmq mysql postgres) + +for BENCH in ${BENCHMARKS[@]}; do + pushd $BENCH + ./${BENCH}.sh + python3 ${BENCH}_plot.py $BENCH-rootful-direct.log $BENCH-rootful-host.log $BENCH-wo-b4ns-direct.log $BENCH-wo-b4ns-host.log $BENCH-w-b4ns.log ../$BENCH.png + popd +done \ No newline at end of file diff --git a/test/init_test.sh b/test/init_test.sh index 87dddcd..ed4e952 100755 --- a/test/init_test.sh +++ b/test/init_test.sh @@ -1,8 +1,11 @@ #!/bin/bash +TEST_USER=ubuntu +if [ -v GITHUB_WORKSPACE ]; then + TEST_USER=runner +fi set -eu -o pipefail -TEST_USER=ubuntu if [ "$(whoami)" != "$TEST_USER" ]; then su $TEST_USER -c $0 @@ -16,7 +19,17 @@ echo "===== Prepare =====" ( set -x - sudo cp -r /host ~/bypass4netns + # for lxc + if [ -d /host ]; then + sudo cp -r /host ~/bypass4netns + fi + + # for github actions runner + if [ $TEST_USER == "runner" ]; then + cd ../ + cp -r bypass4netns ~/bypass4netns + fi + sudo chown -R $TEST_USER:$TEST_USER ~/bypass4netns sudo apt-get update From d06e9bc472d4e9b05fc121010db24e03707c1ac9 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Sat, 9 Dec 2023 15:16:13 +0000 Subject: [PATCH 52/55] improved benchmarks Signed-off-by: Naoki MATSUMOTO --- benchmark/block/bench.go | 29 +++++++++++++++-------------- benchmark/block/block.sh | 23 +++++++++++++++++------ benchmark/block/block_plot.py | 20 ++++++++++---------- test/run_test.sh | 3 +++ 4 files changed, 45 insertions(+), 30 deletions(-) diff --git a/benchmark/block/bench.go b/benchmark/block/bench.go index 15bbb1f..3a2aa15 100644 --- a/benchmark/block/bench.go +++ b/benchmark/block/bench.go @@ -1,7 +1,6 @@ package main import ( - "bytes" "encoding/json" "flag" "fmt" @@ -31,9 +30,6 @@ func main() { //fmt.Printf("thread-num = %d\n", *threadNum) //fmt.Printf("count = %d\n", *count) - // disable connection pool - http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = -1 - resultsChan := make(chan BenchmarkResult, *count) for i := 0; i < *threadNum; i++ { @@ -55,6 +51,8 @@ func main() { } func bench(url string, count int, resultChan chan BenchmarkResult) { + bufferSize := 1024 * 1024 * 128 // 128 MiB + buffer := make([]byte, bufferSize) result := BenchmarkResult{ Url: url, Count: count, @@ -81,17 +79,20 @@ func bench(url string, count int, resultChan chan BenchmarkResult) { fmt.Printf("unexpected status code %d", resp.StatusCode) panic("error") } else { - var buffer bytes.Buffer - - writtenSize, err := io.Copy(&buffer, resp.Body) - if err != nil { - fmt.Printf("failed Copy() err=%q", err) - panic("error") + for { + readSize, err := resp.Body.Read(buffer) + if err != nil && err != io.EOF { + fmt.Printf("failed Copy() err=%q", err) + panic("error") + } + if readSize == 0 { + end := time.Now() + elapsed := end.Sub(start).Seconds() + result.TotalElapsedSecond += elapsed + break + } + result.TotalSize += int64(readSize) } - end := time.Now() - elapsed := end.Sub(start).Seconds() - result.TotalSize += writtenSize - result.TotalElapsedSecond += elapsed } resp.Body.Close() break diff --git a/benchmark/block/block.sh b/benchmark/block/block.sh index 66bb967..b161adb 100755 --- a/benchmark/block/block.sh +++ b/benchmark/block/block.sh @@ -5,6 +5,7 @@ cd $(dirname $0) IMAGE_NAME="block" COUNT="10" +THREAD_NUM="1" source ~/.profile . ../param.bash @@ -35,12 +36,14 @@ echo "===== Benchmark: block rooful via NetNS =====" sudo nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 SERVER_IP=$(sudo nerdctl exec block-server hostname -i) LOG_NAME="block-rootful-direct.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + sudo nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + sudo nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME done sudo nerdctl rm -f block-server @@ -56,11 +59,13 @@ echo "===== Benchmark: block rootful via host =====" sudo nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" sudo nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 LOG_NAME="block-rootful-host.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - sudo nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + sudo nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + sudo nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME done sudo nerdctl rm -f block-server @@ -76,12 +81,14 @@ echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name block-server -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 SERVER_IP=$(nerdctl exec block-server hostname -i) LOG_NAME="block-wo-b4ns-direct.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE >> $LOG_NAME done nerdctl rm -f block-server @@ -97,11 +104,13 @@ echo "===== Benchmark: block client(w/o bypass4netns) server(w/o bypass4netns) v nerdctl run -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" nerdctl run -d --name block-client $IMAGE_NAME sleep infinity + sleep 5 LOG_NAME="block-wo-b4ns-host.log" rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME done nerdctl rm -f block-server @@ -122,10 +131,12 @@ echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) via nerdctl run --label nerdctl/bypass4netns=true -d --name block-server -p 8080:80 -v $(pwd):/var/www/html:ro $IMAGE_NAME nginx -g "daemon off;" nerdctl run --label nerdctl/bypass4netns=true -d --name block-client $IMAGE_NAME sleep infinity LOG_NAME="block-w-b4ns.log" + sleep 5 rm -f $LOG_NAME for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME + nerdctl exec block-client /bench -count 1 -thread-num 1 -url http://$HOST_IP:8080/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$HOST_IP:8080/blk-$BLOCK_SIZE >> $LOG_NAME done nerdctl rm -f block-server @@ -151,7 +162,7 @@ echo "===== Benchmark: block client(w/ bypass4netns) server(w/ bypass4netns) wit SERVER_IP=$(nerdctl exec block-server hostname -i) for BLOCK_SIZE in ${BLOCK_SIZES[@]} do - nerdctl exec block-client /bench -count $COUNT -thread-num 1 -url http://$SERVER_IP/blk-$BLOCK_SIZE + nerdctl exec block-client /bench -count $COUNT -thread-num $THREAD_NUM -url http://$SERVER_IP/blk-$BLOCK_SIZE done nerdctl rm -f block-server diff --git a/benchmark/block/block_plot.py b/benchmark/block/block_plot.py index d303b1a..a8598cf 100644 --- a/benchmark/block/block_plot.py +++ b/benchmark/block/block_plot.py @@ -1,5 +1,5 @@ import matplotlib.pyplot as plt -import numpy as np +import numpy as np import json import sys @@ -11,11 +11,12 @@ def load_data(filename): with open(filename) as f: line = f.readline() while line: - # only single thread - l_json = json.loads(line)[0] - l_json["th_gbps"] = l_json["totalSize"] * 8 / l_json["totalElapsedSecond"] / 1024 / 1024 / 1024 - file = l_json["url"].split("/")[3] - data[file] = l_json + for l in json.loads(line): + gbps = l["totalSize"] * 8 / l["totalElapsedSecond"] / 1024 / 1024 / 1024 + file = l["url"].split("/")[3] + if file not in data: + data[file] = 0.0 + data[file] += gbps line = f.readline() return data @@ -23,14 +24,14 @@ def load_data(filename): plt.ylabel("Gbps") -data_num = len(sys.argv)-2 +data_num = len(sys.argv)-2 factor = (data_num+1) * BAR_WIDTH for i in range(0, data_num): filename = sys.argv[1+i] - data = load_data(filename) + data = load_data(filename) value = [] for l in labels: - value.append(data[l]["th_gbps"]) + value.append(data[l]) plt.bar([x*factor+(BAR_WIDTH*i) for x in range(0, len(labels))], value, align="edge", edgecolor="black", linewidth=1, width=BAR_WIDTH, label=filename) plt.legend() @@ -38,4 +39,3 @@ def load_data(filename): plt.xticks([x*factor+BAR_WIDTH*data_num/2 for x in range(0, len(labels))], labels) plt.savefig(sys.argv[1+data_num]) - diff --git a/test/run_test.sh b/test/run_test.sh index c85a739..179a1fb 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -40,6 +40,9 @@ systemd-run --user --unit run-iperf3 iperf3 -s HOST_IP=$(HOST=$(hostname -I); for i in ${HOST[@]}; do echo $i | grep -q "192.168.6."; if [ $? -eq 0 ]; then echo $i; fi; done) ~/bypass4netns/test/seccomp.json.sh | tee /tmp/seccomp.json +sudo journalctl --rotate +sudo journalctl --vacuum-time=1s + echo "===== rootful mode ====" ( set +e From 4203229f92293a1daeeac6e52389cbe1b26006cd Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 10 Jan 2024 16:04:19 +0900 Subject: [PATCH 53/55] ci: change hooked branches Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2acbcd5..1d9517a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,7 +4,7 @@ on: push: branches: - master - - ng-b4ns + - main - release/** pull_request: null workflow_dispatch: @@ -212,4 +212,4 @@ jobs: - run: sudo ip a add 192.168.6.2/32 dev eth0 - run: hostname -I - run: ./test/init_test.sh - - run: ~/bypass4netns/benchmark/run_bench.sh \ No newline at end of file + - run: ~/bypass4netns/benchmark/run_bench.sh From 4aef232cb3a36afade26e38c84df03e9bba44fa4 Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Wed, 10 Jan 2024 16:20:06 +0900 Subject: [PATCH 54/55] re-enable test Signed-off-by: Naoki MATSUMOTO --- test/run_test.sh | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/test/run_test.sh b/test/run_test.sh index 179a1fb..304af4d 100755 --- a/test/run_test.sh +++ b/test/run_test.sh @@ -15,10 +15,10 @@ if [ "$1" == "SYNC" ]; then sudo cp -r /host ~/bypass4netns sudo chown -R ubuntu:ubuntu ~/bypass4netns cd ~/bypass4netns + echo "source code is updated" exec $0 "FORK" exit 0 fi -echo "source code is updated" cd ~/bypass4netns rm -f bypass4netns bypass4netnsd make @@ -104,29 +104,14 @@ echo "===== '--ignore' option test =====" systemctl --user stop run-bypass4netns.service ) -# nerdctl image build not working. -#[+] Building 10.1s (2/2) FINISHED -# => [internal] load build definition from Dockerfile 0.0s -# => => transferring dockerfile: 274B 0.0s -# => ERROR [internal] load metadata for public.ecr.aws/docker/library/alpine:3.16 10.0s -#------ -# > [internal] load metadata for public.ecr.aws/docker/library/alpine:3.16: -#------ -#Dockerfile:1 -#-------------------- -# 1 | >>> FROM public.ecr.aws/docker/library/alpine:3.16 -# 2 | -# 3 | RUN apk add python3 -#-------------------- -#error: failed to solve: public.ecr.aws/docker/library/alpine:3.16: failed to do request: Head "https://public.ecr.aws/v2/docker/library/alpine/manifests/3.16": dial tcp: lookup public.ecr.aws on 10.0.2.3:53: read udp 10.0.2.100:47105->10.0.2.3:53: i/o timeout -#echo "===== connect(2),sendto(2) test =====" -#( -# systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 -# set -x -# cd $SCRIPT_DIR/test -# /bin/bash test_syscalls.sh /tmp/seccomp.json $(cat /tmp/host_ip) -# systemctl --user stop run-bypass4netns.service -#) +echo "===== connect(2) test =====" +( + systemd-run --user --unit run-bypass4netns bypass4netns --ignore "127.0.0.0/8,10.0.0.0/8" -p 8080:5201 + set -x + cd $SCRIPT_DIR + /bin/bash test_syscalls.sh /tmp/seccomp.json $HOST_IP + systemctl --user stop run-bypass4netns.service +) echo "===== Test bypass4netnsd =====" ( From 4b874fee803256d87c99a4765fd297743f3f5d5f Mon Sep 17 00:00:00 2001 From: Naoki MATSUMOTO Date: Thu, 11 Jan 2024 14:45:29 +0900 Subject: [PATCH 55/55] cleanup Signed-off-by: Naoki MATSUMOTO --- .github/workflows/test.yaml | 42 +++++++++---------- benchmark/block/block_multinode.sh | 2 +- benchmark/etcd/etcd_multinode.sh | 2 +- benchmark/iperf3/iperf3_multinode.sh | 2 +- benchmark/memcached/memcached_multinode.sh | 2 +- benchmark/mysql/mysql.sh | 2 +- benchmark/mysql/mysql_multinode.sh | 2 +- benchmark/postgres/postgres.sh | 2 +- benchmark/postgres/postgres_multinode.sh | 2 +- benchmark/rabbitmq/rabbitmq_multinode.sh | 2 +- benchmark/redis/redis_multinode.sh | 2 +- debug.sh | 39 ----------------- enter.sh => test/enter.sh | 0 .../export_lxc_image.sh | 0 launch_test_lxc.sh => test/launch_test_lxc.sh | 2 +- test/multinode.sh | 2 +- setup_lxd.sh => test/setup_lxd.sh | 4 +- test/setup_vxlan.sh | 2 +- util.sh => test/util.sh | 0 19 files changed, 36 insertions(+), 75 deletions(-) delete mode 100755 debug.sh rename enter.sh => test/enter.sh (100%) rename export_lxc_image.sh => test/export_lxc_image.sh (100%) rename launch_test_lxc.sh => test/launch_test_lxc.sh (87%) rename setup_lxd.sh => test/setup_lxd.sh (77%) rename util.sh => test/util.sh (100%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1d9517a..6b5bff1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,29 +41,29 @@ jobs: path: /tmp/test-image.tar.zst lookup-only: true - - name: setup lxd (v5.19) + - name: setup lxd id: s1 if: steps.cache-restore.outputs.cache-hit != 'true' - run: ./setup_lxd.sh + run: ./test/setup_lxd.sh - name: launch lxc container - id: s6 + id: s2 if: steps.s1.conclusion == 'success' - run: ./launch_test_lxc.sh + run: ./test/launch_test_lxc.sh - name: install dependencies and build - id: s7 - if: steps.s6.conclusion == 'success' + id: s3 + if: steps.s2.conclusion == 'success' run: sudo lxc exec test -- sudo --login --user ubuntu /host/test/init_test.sh - name: export image - id: s8 - if: steps.s7.conclusion == 'success' - run: ./export_lxc_image.sh test + id: s4 + if: steps.s3.conclusion == 'success' + run: ./test/export_lxc_image.sh test - uses: actions/cache/save@v3 - id: s11 - if: steps.s8.conclusion == 'success' + id: s5 + if: steps.s4.conclusion == 'success' with: key: lxc-image-base-${{ hashFiles('go.sum', 'test/init_test.sh') }} path: /tmp/test-image.tar.zst @@ -74,8 +74,8 @@ jobs: timeout-minutes: 20 steps: - uses: actions/checkout@v4.1.1 - - name: setup lxd (v5.19) - run: ./setup_lxd.sh + - name: setup lxd + run: ./test/setup_lxd.sh - uses: actions/cache/restore@v3 id: cache-restore with: @@ -85,12 +85,12 @@ jobs: - name: load lxc image run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container - run: ./launch_test_lxc.sh test-export + run: ./test/launch_test_lxc.sh test-export - name: run test run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/test/run_test.sh SYNC" # some source codes may be updated. re-export new image. - name: export image - run: sudo lxc image alias delete test-export && rm -f /tmp/test-image.tar.zst && ./export_lxc_image.sh test + run: sudo lxc image alias delete test-export && rm -f /tmp/test-image.tar.zst && ./test/export_lxc_image.sh test - uses: actions/cache/save@v3 with: key: lxc-image-${{ github.sha }} @@ -107,8 +107,8 @@ jobs: script: ["iperf3/iperf3_host", "iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] steps: - uses: actions/checkout@v4.1.1 - - name: setup lxd (v5.19) - run: ./setup_lxd.sh + - name: setup lxd + run: ./test/setup_lxd.sh - uses: actions/cache/restore@v3 id: cache-restore with: @@ -118,7 +118,7 @@ jobs: - name: load lxc image run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container - run: ./launch_test_lxc.sh test-export + run: ./test/launch_test_lxc.sh test-export - name: run benchmark (${{ matrix.script }}) run: sudo lxc exec test -- sudo --login --user ubuntu /bin/bash -c "sleep 3 && /home/ubuntu/bypass4netns/benchmark/${{ matrix.script }}.sh" - name: upload plot @@ -147,8 +147,8 @@ jobs: script: ["iperf3/iperf3", "postgres/postgres", "redis/redis", "block/block", "memcached/memcached", "rabbitmq/rabbitmq", "etcd/etcd", "mysql/mysql"] steps: - uses: actions/checkout@v4.1.1 - - name: setup lxd (v5.19) - run: ./setup_lxd.sh + - name: setup lxd + run: ./test/setup_lxd.sh - uses: actions/cache/restore@v3 id: cache-restore with: @@ -158,7 +158,7 @@ jobs: - name: load lxc image run: sudo lxc image import /tmp/test-image.tar.zst --alias test-export - name: launch lxc container - run: ./launch_test_lxc.sh test-export + run: ./test/launch_test_lxc.sh test-export - name: run benchmark (${{ matrix.script }}) run: ./benchmark/${{ matrix.script }}_multinode.sh - name: upload plot diff --git a/benchmark/block/block_multinode.sh b/benchmark/block/block_multinode.sh index be7fe9b..b223c77 100755 --- a/benchmark/block/block_multinode.sh +++ b/benchmark/block/block_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f block-server diff --git a/benchmark/etcd/etcd_multinode.sh b/benchmark/etcd/etcd_multinode.sh index 1eb3d98..3370c0c 100755 --- a/benchmark/etcd/etcd_multinode.sh +++ b/benchmark/etcd/etcd_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f etcd-server diff --git a/benchmark/iperf3/iperf3_multinode.sh b/benchmark/iperf3/iperf3_multinode.sh index d8c5272..00a7f4d 100755 --- a/benchmark/iperf3/iperf3_multinode.sh +++ b/benchmark/iperf3/iperf3_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f iperf3-server diff --git a/benchmark/memcached/memcached_multinode.sh b/benchmark/memcached/memcached_multinode.sh index f9ac038..9f2b1ee 100755 --- a/benchmark/memcached/memcached_multinode.sh +++ b/benchmark/memcached/memcached_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f memcached-server diff --git a/benchmark/mysql/mysql.sh b/benchmark/mysql/mysql.sh index 527825c..f805714 100755 --- a/benchmark/mysql/mysql.sh +++ b/benchmark/mysql/mysql.sh @@ -8,7 +8,7 @@ BENCH_IMAGE="mysql-bench" source ~/.profile cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh . ../param.bash # sometimes fail to pull images diff --git a/benchmark/mysql/mysql_multinode.sh b/benchmark/mysql/mysql_multinode.sh index ca20dc0..b3428ff 100755 --- a/benchmark/mysql/mysql_multinode.sh +++ b/benchmark/mysql/mysql_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f mysql-server diff --git a/benchmark/postgres/postgres.sh b/benchmark/postgres/postgres.sh index be87571..6e87e81 100755 --- a/benchmark/postgres/postgres.sh +++ b/benchmark/postgres/postgres.sh @@ -7,7 +7,7 @@ POSTGRES_IMAGE="postgres:$POSTGRES_VERSION" source ~/.profile cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh . ../param.bash sudo nerdctl pull --quiet $POSTGRES_IMAGE diff --git a/benchmark/postgres/postgres_multinode.sh b/benchmark/postgres/postgres_multinode.sh index 503af07..d821d0f 100755 --- a/benchmark/postgres/postgres_multinode.sh +++ b/benchmark/postgres/postgres_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f psql-server diff --git a/benchmark/rabbitmq/rabbitmq_multinode.sh b/benchmark/rabbitmq/rabbitmq_multinode.sh index e0f146d..a1463a0 100755 --- a/benchmark/rabbitmq/rabbitmq_multinode.sh +++ b/benchmark/rabbitmq/rabbitmq_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f rabbitmq-server diff --git a/benchmark/redis/redis_multinode.sh b/benchmark/redis/redis_multinode.sh index 06ecf71..21dd291 100755 --- a/benchmark/redis/redis_multinode.sh +++ b/benchmark/redis/redis_multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../../util.sh +. ../../test/util.sh set +e NAME="test" exec_lxc sudo nerdctl rm -f redis-server diff --git a/debug.sh b/debug.sh deleted file mode 100755 index a10887b..0000000 --- a/debug.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -sudo lxc rm -f test -sudo lxc rm -f test2 - -set -eux -o pipefail - -TEST1_VXLAN_MAC="02:42:c0:a8:00:1" -TEST1_VXLAN_ADDR="192.168.2.1" -TEST2_VXLAN_MAC="02:42:c0:a8:00:2" -TEST2_VXLAN_ADDR="192.168.2.2" - -cd $(dirname $0) -. ./util.sh - -sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test -sudo lxc launch -c security.nesting=true images:ubuntu/22.04 test2 - -sleep 5 - -TEST_ADDR=$(NAME="test" exec_lxc hostname -I) -TEST2_ADDR=$(NAME="test2" exec_lxc hostname -I) - -NAME="test" exec_lxc sudo apt install -y ethtool -NAME="test" exec_lxc sudo ip link add vxlan0 type vxlan id 100 noproxy nolearning remote $TEST2_ADDR dstport 4789 dev eth0 -NAME="test" exec_lxc sudo ethtool -K vxlan0 tx-checksum-ip-generic off -NAME="test" exec_lxc sudo ip a add $TEST1_VXLAN_ADDR/24 dev vxlan0 -NAME="test" exec_lxc sudo ip link set vxlan0 up - -NAME="test2" exec_lxc sudo apt install -y ethtool -NAME="test2" exec_lxc sudo ip link add vxlan0 type vxlan id 100 noproxy nolearning remote $TEST_ADDR dstport 4789 dev eth0 -NAME="test2" exec_lxc sudo ethtool -K vxlan0 tx-checksum-ip-generic off -NAME="test2" exec_lxc sudo ip a add $TEST2_VXLAN_ADDR/24 dev vxlan0 -NAME="test2" exec_lxc sudo ip link set vxlan0 up - -NAME="test" exec_lxc ping -c 5 $TEST2_VXLAN_ADDR - -sudo lxc rm -f test -sudo lxc rm -f test2 \ No newline at end of file diff --git a/enter.sh b/test/enter.sh similarity index 100% rename from enter.sh rename to test/enter.sh diff --git a/export_lxc_image.sh b/test/export_lxc_image.sh similarity index 100% rename from export_lxc_image.sh rename to test/export_lxc_image.sh diff --git a/launch_test_lxc.sh b/test/launch_test_lxc.sh similarity index 87% rename from launch_test_lxc.sh rename to test/launch_test_lxc.sh index 92d6903..6710634 100755 --- a/launch_test_lxc.sh +++ b/test/launch_test_lxc.sh @@ -7,7 +7,7 @@ cd $(dirname $0) # lxd init --auto --storage-backend=btrfs sudo lxc launch -c security.privileged=true -c security.nesting=true $IMAGE test -sudo lxc config device add test share disk source=$(pwd) path=/host +sudo lxc config device add test share disk source=$(cd ../; pwd) path=/host sudo lxc exec test -- /bin/bash -c "echo 'ubuntu ALL=NOPASSWD: ALL' | EDITOR='tee -a' visudo" # let user services running # this sometimes fails, retry until success diff --git a/test/multinode.sh b/test/multinode.sh index d762b3d..dd95eb0 100755 --- a/test/multinode.sh +++ b/test/multinode.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../util.sh +. ./util.sh set +e NAME="test" exec_lxc nerdctl rm -f vxlan diff --git a/setup_lxd.sh b/test/setup_lxd.sh similarity index 77% rename from setup_lxd.sh rename to test/setup_lxd.sh index 6c77143..f619fa1 100755 --- a/setup_lxd.sh +++ b/test/setup_lxd.sh @@ -4,10 +4,10 @@ set -eux -o pipefail cd $(dirname $0) -#sudo snap remove --purge lxd && sudo snap install lxd --revision=26093 sudo modprobe vxlan -cat test/lxd.yaml | sudo lxd init --preseed +cat lxd.yaml | sudo lxd init --preseed sudo sysctl -w net.ipv4.ip_forward=1 + #https://andreas.scherbaum.la/post/2023-01-18_fix-lxc-network-issues-in-ubuntu-22.04/ sudo iptables -I DOCKER-USER -i lxdbr0 -o eth0 -j ACCEPT sudo iptables -I DOCKER-USER -o lxdbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT diff --git a/test/setup_vxlan.sh b/test/setup_vxlan.sh index 0d144e6..fd6b0ca 100755 --- a/test/setup_vxlan.sh +++ b/test/setup_vxlan.sh @@ -1,7 +1,7 @@ #!/bin/bash cd $(dirname $0) -. ../util.sh +. ./util.sh set -eux -o pipefail diff --git a/util.sh b/test/util.sh similarity index 100% rename from util.sh rename to test/util.sh