From 0217c4fc531a949ee0276e34340ec89b3bb4b951 Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Tue, 18 Apr 2023 10:09:45 -0300 Subject: [PATCH] Update libnetwork This new version has a patch cherry-picked from here: https://github.com/moby/libnetwork/pull/1805 This patch is meant to avoid cases in which libnetwork internal state gets inconsistent in case of crashes. Signed-off-by: Leandro Motta Barros Change-type: patch --- vendor.conf | 2 +- .../github.com/docker/libnetwork/endpoint.go | 9 +-- .../docker/libnetwork/sandbox_store.go | 57 ++++++++++++------- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/vendor.conf b/vendor.conf index 5816d49145..ebff4f8499 100644 --- a/vendor.conf +++ b/vendor.conf @@ -47,7 +47,7 @@ github.com/grpc-ecosystem/go-grpc-middleware df0f91b29bbbdfc3a686a7a8edbe # libnetwork # When updating, also update LIBNETWORK_COMMIT in hack/dockerfile/install/proxy.installer accordingly -github.com/docker/libnetwork 4d0934611265197ec35c1fcb6233c0e3757aecc3 https://github.com/balena-os/balena-libnetwork +github.com/docker/libnetwork ace5cf58e62c8569b12363b365ee44422cb8c391 https://github.com/balena-os/balena-libnetwork github.com/docker/go-events e31b211e4f1cd09aa76fe4ac244571fab96ae47f github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec diff --git a/vendor/github.com/docker/libnetwork/endpoint.go b/vendor/github.com/docker/libnetwork/endpoint.go index d79bd33648..62376f3f14 100644 --- a/vendor/github.com/docker/libnetwork/endpoint.go +++ b/vendor/github.com/docker/libnetwork/endpoint.go @@ -844,10 +844,6 @@ func (ep *endpoint) Delete(force bool) error { } } - if err = n.getController().deleteFromStore(ep); err != nil { - return err - } - defer func() { if err != nil && !force { ep.dbExists = false @@ -864,6 +860,11 @@ func (ep *endpoint) Delete(force bool) error { return err } + // This has to come after the sandbox and the driver to guarantee that can be the source of truth on restart cases + if err = n.getController().deleteFromStore(ep); err != nil { + return err + } + ep.releaseAddress() if err := n.getEpCnt().DecEndpointCnt(); err != nil { diff --git a/vendor/github.com/docker/libnetwork/sandbox_store.go b/vendor/github.com/docker/libnetwork/sandbox_store.go index 1e53815aee..0cd7a7fca5 100644 --- a/vendor/github.com/docker/libnetwork/sandbox_store.go +++ b/vendor/github.com/docker/libnetwork/sandbox_store.go @@ -2,7 +2,6 @@ package libnetwork import ( "encoding/json" - "sync" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/osl" @@ -207,6 +206,40 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) { return } + // Get all the endpoints + // Use the network as the source of truth so that if there was an issue before the sandbox registered the endpoint + // this will be taken anyway + endpointsInSandboxID := map[string][]*endpoint{} + nl, err := c.getNetworksForScope(datastore.LocalScope) + if err != nil { + logrus.Warnf("Could not get list of networks during sandbox cleanup: %v", err) + return + } + + for _, n := range nl { + var epl []*endpoint + epl, err = n.getEndpointsFromStore() + if err != nil { + logrus.Warnf("Could not get list of endpoints in network %s during sandbox cleanup: %v", n.name, err) + continue + } + for _, ep := range epl { + ep, err = n.getEndpointFromStore(ep.id) + if err != nil { + logrus.Warnf("Could not get endpoint in network %s during sandbox cleanup: %v", n.name, err) + continue + } + if ep.sandboxID == "" { + logrus.Warnf("Endpoint %s not associated to any sandbox, deleting it", ep.id) + ep.Delete(true) + continue + } + + // Append the endpoint to the corresponding sandboxID + endpointsInSandboxID[ep.sandboxID] = append(endpointsInSandboxID[ep.sandboxID], ep) + } + } + for _, kvo := range kvol { sbs := kvo.(*sbState) @@ -252,25 +285,11 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) { c.sandboxes[sb.id] = sb c.Unlock() - for _, eps := range sbs.Eps { - n, err := c.getNetworkFromStore(eps.Nid) - var ep *endpoint - if err != nil { - logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err) - n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}, persist: true} - ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID} - } else { - ep, err = n.getEndpointFromStore(eps.Eid) - if err != nil { - logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err) - ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID} - } - } - if _, ok := activeSandboxes[sb.ID()]; ok && err != nil { - logrus.Errorf("failed to restore endpoint %s in %s for container %s due to %v", eps.Eid, eps.Nid, sb.ContainerID(), err) - continue + // Restore all the endpoints that are supposed to be in this sandbox + if eps, ok := endpointsInSandboxID[sb.id]; ok { + for _, ep := range eps { + sb.addEndpoint(ep) } - sb.addEndpoint(ep) } if _, ok := activeSandboxes[sb.ID()]; !ok {