From cf18ecdb12f618fd41d4c78ceedcdec19c1aabfc Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Fri, 20 Dec 2024 10:56:43 +0000 Subject: [PATCH] HPCC-33146 Add backoff if vault authentication fails and check other vaults Signed-off-by: Gavin Halliday --- helm/hpcc/templates/_helpers.tpl | 3 + helm/hpcc/values.schema.json | 4 ++ system/jlib/jsecrets.cpp | 101 ++++++++++++++++++++----------- 3 files changed, 72 insertions(+), 36 deletions(-) diff --git a/helm/hpcc/templates/_helpers.tpl b/helm/hpcc/templates/_helpers.tpl index f3cf675df1e..4710f184ccf 100644 --- a/helm/hpcc/templates/_helpers.tpl +++ b/helm/hpcc/templates/_helpers.tpl @@ -686,6 +686,9 @@ vaults: {{- if (hasKey $vault "writeTimeout") }} writeTimeout: {{ $vault.writeTimeout }} {{- end }} + {{- if (hasKey $vault "backoffTimeout") }} + backoffTimeout: {{ $vault.backoffTimeout }} + {{- end }} {{- end -}} {{- end -}} {{- end -}} diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index 75cc142d971..1aad55c1319 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -925,6 +925,10 @@ "writeTimeout": { "description": "optional timeout (in ms) for socket writing to vault", "type": "number" + }, + "backoffTimeout": { + "description": "optional time (in ms) to back-off connecting to a vault that cannot be reached, or fails to authenticate", + "type": "number" } }, "required": [ "name", "url" ], diff --git a/system/jlib/jsecrets.cpp b/system/jlib/jsecrets.cpp index e5015e86d20..da9aed4e3f4 100644 --- a/system/jlib/jsecrets.cpp +++ b/system/jlib/jsecrets.cpp @@ -606,6 +606,8 @@ class CVault bool verify_server = true; unsigned retries = 3; unsigned retryWait = 1000; + timestamp_type backoffTimeoutUs = 5*60*1000000ULL; + std::atomic backoffFailureTs{0}; timeval connectTimeout = {0, 0}; timeval readTimeout = {0, 0}; timeval writeTimeout = {0, 0}; @@ -648,6 +650,9 @@ class CVault retries = (unsigned) vault->getPropInt("@retries", retries); retryWait = (unsigned) vault->getPropInt("@retryWait", retryWait); + if (vault->hasProp("@backoffTimeout")) + backoffTimeoutUs = vault->getPropInt64("@backoffTimeout") * 1000ULL; + setTimevalMS(connectTimeout, (time_t) vault->getPropInt("@connectTimeout")); setTimevalMS(readTimeout, (time_t) vault->getPropInt("@readTimeout")); setTimevalMS(writeTimeout, (time_t) vault->getPropInt("@writeTimeout")); @@ -894,55 +899,79 @@ class CVault } bool requestSecretAtLocation(CVaultKind &rkind, StringBuffer &content, const char *location, const char *secretCacheKey, const char *version, bool permissionDenied) { - checkAuthentication(permissionDenied); - if (isEmptyString(location)) + //If a previous request failed, then do not retry until backoffTimeoutUs has passed. + if (backoffFailureTs) { - OERRLOG("Vault %s cannot get secret at location without a location", name.str()); - return false; - } - - httplib::Client cli(schemeHostPort.str()); - httplib::Headers headers = { - { "X-Vault-Token", clientToken.str() } - }; + if (getTimeStampNowValue() - backoffFailureTs < backoffTimeoutUs) + return false; - unsigned numRetries = 0; - initClient(cli, headers, numRetries); - httplib::Result res = cli.Get(location, headers); - while (!res && numRetries--) - { - OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location); - if (retryWait) - Sleep(retryWait); - res = cli.Get(location, headers); + //Clear the last failure - to avoid the check on the timestamp. + backoffFailureTs = 0; } - if (res) + bool backoff = false; + try { - if (res->status == 200) + checkAuthentication(permissionDenied); + if (isEmptyString(location)) { - rkind = kind; - content.append(res->body.c_str()); - return true; + OERRLOG("Vault %s cannot get secret at location without a location", name.str()); + return false; } - else if (res->status == 403) + + httplib::Client cli(schemeHostPort.str()); + httplib::Headers headers = { + { "X-Vault-Token", clientToken.str() } + }; + + unsigned numRetries = 0; + initClient(cli, headers, numRetries); + httplib::Result res = cli.Get(location, headers); + while (!res && numRetries--) { - //try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded). - if (permissionDenied==false) - return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true); - OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str()); + OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location); + if (retryWait) + Sleep(retryWait); + res = cli.Get(location, headers); } - else if (res->status == 404) + + if (res) { - OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location); + if (res->status == 200) + { + rkind = kind; + content.append(res->body.c_str()); + return true; + } + else if (res->status == 403) + { + //try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded). + if (permissionDenied==false) + return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true); + OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str()); + } + else if (res->status == 404) + { + OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location); + } + else + { + OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str()); + } } else - { - OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str()); - } + OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location); } - else - OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location); + catch (IException * e) + { + backoff = true; + OERRLOG(e); + e->Release(); + } + + //If authentication failed (or another serious error), then record when that failure occurred + if (backoff) + backoffFailureTs = getTimeStampNowValue(); return false; }