diff --git a/endpoint.go b/endpoint.go index d79bd33648..62376f3f14 100644 --- a/endpoint.go +++ b/endpoint.go @@ -844,10 +844,6 @@ func (ep *endpoint) Delete(force bool) error { } } - if err = n.getController().deleteFromStore(ep); err != nil { - return err - } - defer func() { if err != nil && !force { ep.dbExists = false @@ -864,6 +860,11 @@ func (ep *endpoint) Delete(force bool) error { return err } + // This has to come after the sandbox and the driver to guarantee that can be the source of truth on restart cases + if err = n.getController().deleteFromStore(ep); err != nil { + return err + } + ep.releaseAddress() if err := n.getEpCnt().DecEndpointCnt(); err != nil { diff --git a/sandbox_store.go b/sandbox_store.go index 1e53815aee..0cd7a7fca5 100644 --- a/sandbox_store.go +++ b/sandbox_store.go @@ -2,7 +2,6 @@ package libnetwork import ( "encoding/json" - "sync" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/osl" @@ -207,6 +206,40 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) { return } + // Get all the endpoints + // Use the network as the source of truth so that if there was an issue before the sandbox registered the endpoint + // this will be taken anyway + endpointsInSandboxID := map[string][]*endpoint{} + nl, err := c.getNetworksForScope(datastore.LocalScope) + if err != nil { + logrus.Warnf("Could not get list of networks during sandbox cleanup: %v", err) + return + } + + for _, n := range nl { + var epl []*endpoint + epl, err = n.getEndpointsFromStore() + if err != nil { + logrus.Warnf("Could not get list of endpoints in network %s during sandbox cleanup: %v", n.name, err) + continue + } + for _, ep := range epl { + ep, err = n.getEndpointFromStore(ep.id) + if err != nil { + logrus.Warnf("Could not get endpoint in network %s during sandbox cleanup: %v", n.name, err) + continue + } + if ep.sandboxID == "" { + logrus.Warnf("Endpoint %s not associated to any sandbox, deleting it", ep.id) + ep.Delete(true) + continue + } + + // Append the endpoint to the corresponding sandboxID + endpointsInSandboxID[ep.sandboxID] = append(endpointsInSandboxID[ep.sandboxID], ep) + } + } + for _, kvo := range kvol { sbs := kvo.(*sbState) @@ -252,25 +285,11 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) { c.sandboxes[sb.id] = sb c.Unlock() - for _, eps := range sbs.Eps { - n, err := c.getNetworkFromStore(eps.Nid) - var ep *endpoint - if err != nil { - logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err) - n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}, persist: true} - ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID} - } else { - ep, err = n.getEndpointFromStore(eps.Eid) - if err != nil { - logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err) - ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID} - } - } - if _, ok := activeSandboxes[sb.ID()]; ok && err != nil { - logrus.Errorf("failed to restore endpoint %s in %s for container %s due to %v", eps.Eid, eps.Nid, sb.ContainerID(), err) - continue + // Restore all the endpoints that are supposed to be in this sandbox + if eps, ok := endpointsInSandboxID[sb.id]; ok { + for _, ep := range eps { + sb.addEndpoint(ep) } - sb.addEndpoint(ep) } if _, ok := activeSandboxes[sb.ID()]; !ok {