Skip to content

Commit

Permalink
HPCC-33146 Add backoff if vault authentication fails and check other …
Browse files Browse the repository at this point in the history
…vaults

Signed-off-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday committed Dec 20, 2024
1 parent 098bf2c commit cf18ecd
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 36 deletions.
3 changes: 3 additions & 0 deletions helm/hpcc/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,9 @@ vaults:
{{- if (hasKey $vault "writeTimeout") }}
writeTimeout: {{ $vault.writeTimeout }}
{{- end }}
{{- if (hasKey $vault "backoffTimeout") }}
backoffTimeout: {{ $vault.backoffTimeout }}
{{- end }}
{{- end -}}
{{- end -}}
{{- end -}}
Expand Down
4 changes: 4 additions & 0 deletions helm/hpcc/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,10 @@
"writeTimeout": {
"description": "optional timeout (in ms) for socket writing to vault",
"type": "number"
},
"backoffTimeout": {
"description": "optional time (in ms) to back-off connecting to a vault that cannot be reached, or fails to authenticate",
"type": "number"
}
},
"required": [ "name", "url" ],
Expand Down
101 changes: 65 additions & 36 deletions system/jlib/jsecrets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,8 @@ class CVault
bool verify_server = true;
unsigned retries = 3;
unsigned retryWait = 1000;
timestamp_type backoffTimeoutUs = 5*60*1000000ULL;
std::atomic<timestamp_type> backoffFailureTs{0};
timeval connectTimeout = {0, 0};
timeval readTimeout = {0, 0};
timeval writeTimeout = {0, 0};
Expand Down Expand Up @@ -648,6 +650,9 @@ class CVault
retries = (unsigned) vault->getPropInt("@retries", retries);
retryWait = (unsigned) vault->getPropInt("@retryWait", retryWait);

if (vault->hasProp("@backoffTimeout"))
backoffTimeoutUs = vault->getPropInt64("@backoffTimeout") * 1000ULL;

setTimevalMS(connectTimeout, (time_t) vault->getPropInt("@connectTimeout"));
setTimevalMS(readTimeout, (time_t) vault->getPropInt("@readTimeout"));
setTimevalMS(writeTimeout, (time_t) vault->getPropInt("@writeTimeout"));
Expand Down Expand Up @@ -894,55 +899,79 @@ class CVault
}
bool requestSecretAtLocation(CVaultKind &rkind, StringBuffer &content, const char *location, const char *secretCacheKey, const char *version, bool permissionDenied)
{
checkAuthentication(permissionDenied);
if (isEmptyString(location))
//If a previous request failed, then do not retry until backoffTimeoutUs has passed.
if (backoffFailureTs)
{
OERRLOG("Vault %s cannot get secret at location without a location", name.str());
return false;
}

httplib::Client cli(schemeHostPort.str());
httplib::Headers headers = {
{ "X-Vault-Token", clientToken.str() }
};
if (getTimeStampNowValue() - backoffFailureTs < backoffTimeoutUs)
return false;

unsigned numRetries = 0;
initClient(cli, headers, numRetries);
httplib::Result res = cli.Get(location, headers);
while (!res && numRetries--)
{
OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location);
if (retryWait)
Sleep(retryWait);
res = cli.Get(location, headers);
//Clear the last failure - to avoid the check on the timestamp.
backoffFailureTs = 0;
}

if (res)
bool backoff = false;
try
{
if (res->status == 200)
checkAuthentication(permissionDenied);
if (isEmptyString(location))
{
rkind = kind;
content.append(res->body.c_str());
return true;
OERRLOG("Vault %s cannot get secret at location without a location", name.str());
return false;
}
else if (res->status == 403)

httplib::Client cli(schemeHostPort.str());
httplib::Headers headers = {
{ "X-Vault-Token", clientToken.str() }
};

unsigned numRetries = 0;
initClient(cli, headers, numRetries);
httplib::Result res = cli.Get(location, headers);
while (!res && numRetries--)
{
//try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded).
if (permissionDenied==false)
return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true);
OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
OERRLOG("Retrying vault %s get secret, communication error %d location %s", name.str(), res.error(), location);
if (retryWait)
Sleep(retryWait);
res = cli.Get(location, headers);
}
else if (res->status == 404)

if (res)
{
OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location);
if (res->status == 200)
{
rkind = kind;
content.append(res->body.c_str());
return true;
}
else if (res->status == 403)
{
//try again forcing relogin, but only once. Just in case the token was invalidated but hasn't passed expiration time (for example max usage count exceeded).
if (permissionDenied==false)
return requestSecretAtLocation(rkind, content, location, secretCacheKey, version, true);
OERRLOG("Vault %s permission denied accessing secret (check namespace=%s?) %s.%s location %s [%d](%d) - response: %s", name.str(), vaultNamespace.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
}
else if (res->status == 404)
{
OERRLOG("Vault %s secret not found %s.%s location %s", name.str(), secretCacheKey, version ? version : "", location);
}
else
{
OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
}
}
else
{
OERRLOG("Vault %s error accessing secret %s.%s location %s [%d](%d) - response: %s", name.str(), secretCacheKey, version ? version : "", location, res->status, res.error(), res->body.c_str());
}
OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location);
}
else
OERRLOG("Error: Vault %s http error (%d) accessing secret %s.%s location %s", name.str(), res.error(), secretCacheKey, version ? version : "", location);
catch (IException * e)
{
backoff = true;
OERRLOG(e);
e->Release();
}

//If authentication failed (or another serious error), then record when that failure occurred
if (backoff)
backoffFailureTs = getTimeStampNowValue();

return false;
}
Expand Down

0 comments on commit cf18ecd

Please sign in to comment.