From 2eccfe4337646816334e8440eb72f32d457f11c0 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 8 Nov 2024 15:45:39 -0500 Subject: [PATCH] vault: catch expired lease as fatal error When a Vault lease expires, it's revoked on the server and cannot be removed, so this error should be treated as fatal. The errors we get aren't wrapped by the Vault SDK, so unfortunately we have to read the error messages and can't easily enumerate non-fatal error messages (which might be bubbling up from the stdlib). I've audited the errors currently used and have documented their source. Ref https://github.com/hashicorp/vault/blob/52ba156d47da170bf40471fe57d72522030bdc7e/vault/expiration.go#L1327 Fixes: https://github.com/hashicorp/nomad/issues/23859 --- .changelog/24409.txt | 3 +++ client/vaultclient/vaultclient.go | 27 +++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 .changelog/24409.txt diff --git a/.changelog/24409.txt b/.changelog/24409.txt new file mode 100644 index 00000000000..ab4570d5e27 --- /dev/null +++ b/.changelog/24409.txt @@ -0,0 +1,3 @@ +```release-note:bug +vault: Fixed a bug where expired secret leases were treated as non-fatal and retried +``` diff --git a/client/vaultclient/vaultclient.go b/client/vaultclient/vaultclient.go index 3985bd2cc62..ada93ee1810 100644 --- a/client/vaultclient/vaultclient.go +++ b/client/vaultclient/vaultclient.go @@ -399,6 +399,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { var renewalErr error leaseDuration := req.increment + if req.isToken { // Set the token in the API client to the one that needs renewal c.client.SetToken(req.id) @@ -434,14 +435,24 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { next := time.Now().Add(renewalDuration) fatal := false - if renewalErr != nil && - (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || - strings.Contains(renewalErr.Error(), "invalid lease ID") || - strings.Contains(renewalErr.Error(), "lease is not renewable") || - strings.Contains(renewalErr.Error(), "token not found") || - strings.Contains(renewalErr.Error(), "permission denied")) { - fatal = true - } else if renewalErr != nil { + if renewalErr != nil { + // These errors aren't wrapped by the Vault SDK, so we have to read the + // error messages. Unfortunately we can't easily enumerate non-fatal + // errors so we have a large set here. These can be found at in + // vault/expiration.go. + // Current as of vault commit 52ba156d47da170bf40471fe57d72522030bdc7e + errMsg := renewalErr.Error() + if strings.Contains(errMsg, "no namespace") || + strings.Contains(errMsg, "cannot renew a token across namespaces") || + strings.Contains(errMsg, "invalid lease ID") || + strings.Contains(errMsg, "lease expired") || + strings.Contains(errMsg, "lease is not renewable") || + strings.Contains(errMsg, "lease not found") || + strings.Contains(errMsg, "permission denied") || + strings.Contains(errMsg, "token not found") { + fatal = true + } + } else { c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration) c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying", "error", renewalErr, "period", next)