Skip to content

Commit

Permalink
Support specifying GCP account credentials as a config option. (#4855)
Browse files Browse the repository at this point in the history
[SC-44515](https://app.shortcut.com/tiledb-inc/story/44515/support-specifying-gcp-account-credentials-as-a-config-option)

---
TYPE: CONFIG
DESC: Add `vfs.gcs.service_account_credential` config option that
specifies a Google Cloud service account credential JSON string.

---
TYPE: CONFIG
DESC: Add `vfs.gcs.external_account_credential` config option that
specifies a Google Cloud Workload Identity Federation credential JSON
string.
  • Loading branch information
teo-tsirpanis committed Apr 11, 2024
1 parent 96f33c5 commit f72c428
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 3 deletions.
6 changes: 6 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["vfs.read_logging_mode"] = "";
all_param_values["vfs.gcs.endpoint"] = "";
all_param_values["vfs.gcs.project_id"] = "";
all_param_values["vfs.gcs.service_account_key"] = "";
all_param_values["vfs.gcs.workload_identity_configuration"] = "";
all_param_values["vfs.gcs.impersonate_service_account"] = "";
all_param_values["vfs.gcs.max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down Expand Up @@ -761,6 +763,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
vfs_param_values["read_logging_mode"] = "";
vfs_param_values["gcs.endpoint"] = "";
vfs_param_values["gcs.project_id"] = "";
vfs_param_values["gcs.service_account_key"] = "";
vfs_param_values["gcs.workload_identity_configuration"] = "";
vfs_param_values["gcs.impersonate_service_account"] = "";
vfs_param_values["gcs.max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down Expand Up @@ -825,6 +829,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
std::map<std::string, std::string> gcs_param_values;
gcs_param_values["endpoint"] = "";
gcs_param_values["project_id"] = "";
gcs_param_values["service_account_key"] = "";
gcs_param_values["workload_identity_configuration"] = "";
gcs_param_values["impersonate_service_account"] = "";
gcs_param_values["max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-cppapi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") {
names.push_back(it->first);
}
// Check number of VFS params in default config object.
CHECK(names.size() == 66);
CHECK(names.size() == 68);
}

TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") {
Expand Down
94 changes: 93 additions & 1 deletion test/src/unit-vfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@ TEST_CASE("Validate vfs.s3.custom_headers.*", "[s3][custom-headers]") {

#ifdef HAVE_GCS
TEST_CASE(
"Validate GCS service account impersonation", "[gcs][impersonation]") {
"Validate GCS service account impersonation",
"[gcs][credentials][impersonation]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
Expand Down Expand Up @@ -802,4 +803,95 @@ TEST_CASE(
target_service_account);
REQUIRE(impersonate_credentials->delegates() == delegates);
}

TEST_CASE(
"Validate GCS service account credentials",
"[gcs][credentials][service-account]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string service_account_key = "{\"foo\": \"bar\"}";

require_tiledb_ok(
cfg.set("vfs.gcs.service_account_key", service_account_key));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto service_account =
dynamic_cast<google::cloud::internal::ServiceAccountConfig*>(
credentials.get());

REQUIRE(service_account != nullptr);
REQUIRE(service_account->json_object() == service_account_key);
}

TEST_CASE(
"Validate GCS service account credentials with impersonation",
"[gcs][credentials][service-account-and-impersonation]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string service_account_key = "{\"foo\": \"bar\"}";
std::string impersonate_service_account = "account1,account2,account3";

require_tiledb_ok(
cfg.set("vfs.gcs.service_account_key", service_account_key));
require_tiledb_ok(cfg.set(
"vfs.gcs.impersonate_service_account", impersonate_service_account));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto impersonate_credentials =
dynamic_cast<google::cloud::internal::ImpersonateServiceAccountConfig*>(
credentials.get());
REQUIRE(impersonate_credentials != nullptr);
REQUIRE(impersonate_credentials->target_service_account() == "account3");
REQUIRE(
impersonate_credentials->delegates() ==
std::vector<std::string>{"account1", "account2"});

auto inner_service_account =
dynamic_cast<google::cloud::internal::ServiceAccountConfig*>(
impersonate_credentials->base_credentials().get());

REQUIRE(inner_service_account != nullptr);
REQUIRE(inner_service_account->json_object() == service_account_key);
}

TEST_CASE(
"Validate GCS external account credentials",
"[gcs][credentials][external-account]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string workload_identity_configuration = "{\"foo\": \"bar\"}";

require_tiledb_ok(cfg.set(
"vfs.gcs.workload_identity_configuration",
workload_identity_configuration));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto external_account =
dynamic_cast<google::cloud::internal::ExternalAccountConfig*>(
credentials.get());

REQUIRE(external_account != nullptr);
REQUIRE(external_account->json_object() == workload_identity_configuration);
}
#endif
11 changes: 11 additions & 0 deletions tiledb/api/c_api/config/config_api_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,17 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT;
* - `vfs.gcs.project_id` <br>
* Set the GCS project id. <br>
* **Default**: ""
* - `vfs.gcs.service_account_key` <br>
* Set the JSON string with GCS service account key. Takes precedence
* over `vfs.gcs.workload_identity_configuration` if both are specified. If
* neither is specified, Application Default Credentials will be used. <br>
* **Default**: ""
* - `vfs.gcs.workload_identity_configuration` <br>
* Set the JSON string with Workload Identity Federation configuration.
* `vfs.gcs.service_account_key` takes precedence over this if both are
* specified. If neither is specified, Application Default Credentials will
* be used. <br>
* **Default**: ""
* - `vfs.gcs.impersonate_service_account` <br>
* Set the GCS service account to impersonate. A chain of impersonated
* accounts can be formed by specifying many service accounts, separated by a
Expand Down
9 changes: 9 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ const std::string Config::VFS_AZURE_RETRY_DELAY_MS = "800";
const std::string Config::VFS_AZURE_MAX_RETRY_DELAY_MS = "60000";
const std::string Config::VFS_GCS_ENDPOINT = "";
const std::string Config::VFS_GCS_PROJECT_ID = "";
const std::string Config::VFS_GCS_SERVICE_ACCOUNT_KEY = "";
const std::string Config::VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION = "";
const std::string Config::VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT = "";
const std::string Config::VFS_GCS_MAX_PARALLEL_OPS =
Config::SM_IO_CONCURRENCY_LEVEL;
Expand Down Expand Up @@ -421,6 +423,11 @@ const std::map<std::string, std::string> default_config_values = {
"vfs.azure.max_retry_delay_ms", Config::VFS_AZURE_MAX_RETRY_DELAY_MS),
std::make_pair("vfs.gcs.endpoint", Config::VFS_GCS_ENDPOINT),
std::make_pair("vfs.gcs.project_id", Config::VFS_GCS_PROJECT_ID),
std::make_pair(
"vfs.gcs.service_account_key", Config::VFS_GCS_SERVICE_ACCOUNT_KEY),
std::make_pair(
"vfs.gcs.workload_identity_configuration",
Config::VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION),
std::make_pair(
"vfs.gcs.impersonate_service_account",
Config::VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT),
Expand Down Expand Up @@ -513,6 +520,8 @@ const std::set<std::string> Config::unserialized_params_ = {
"vfs.s3.aws_external_id",
"vfs.s3.aws_load_frequency",
"vfs.s3.aws_session_name",
"vfs.gcs.service_account_key",
"vfs.gcs.workload_identity_configuration",
"vfs.gcs.impersonate_service_account",
"rest.username",
"rest.password",
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,12 @@ class Config {
/** GCS service account(s) to impersonate. */
static const std::string VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT;

/** GCS service account key JSON string. */
static const std::string VFS_GCS_SERVICE_ACCOUNT_KEY;

/** GCS external account credentials JSON string. */
static const std::string VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION;

/** GCS max parallel ops. */
static const std::string VFS_GCS_MAX_PARALLEL_OPS;

Expand Down
11 changes: 11 additions & 0 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,17 @@ class Config {
* - `vfs.gcs.project_id` <br>
* Set the GCS project id. <br>
* **Default**: ""
* - `vfs.gcs.service_account_key` <br>
* Set the JSON string with GCS service account key. Takes precedence
* over `vfs.gcs.workload_identity_configuration` if both are specified. If
* neither is specified, Application Default Credentials will be used. <br>
* **Default**: ""
* - `vfs.gcs.workload_identity_configuration` <br>
* Set the JSON string with Workload Identity Federation configuration.
* `vfs.gcs.service_account_key` takes precedence over this if both are
* specified. If neither is specified, Application Default Credentials will
* be used. <br>
* **Default**: ""
* - `vfs.gcs.impersonate_service_account` <br>
* Set the GCS service account to impersonate. A chain of impersonated
* accounts can be formed by specifying many service accounts, separated by
Expand Down
18 changes: 17 additions & 1 deletion tiledb/sm/filesystem/gcs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ Status GCS::init(const Config& config, ThreadPool* const thread_pool) {
}
project_id_ = config.get("vfs.gcs.project_id", &found);
assert(found);
service_account_key_ = config.get("vfs.gcs.service_account_key", &found);
assert(found);
workload_identity_configuration_ =
config.get("vfs.gcs.workload_identity_configuration", &found);
assert(found);
impersonate_service_account_ =
config.get("vfs.gcs.impersonate_service_account", &found);
assert(found);
Expand Down Expand Up @@ -187,7 +192,18 @@ static shared_ptr<google::cloud::Credentials> apply_impersonation(
std::shared_ptr<google::cloud::Credentials> GCS::make_credentials(
const google::cloud::Options& options) const {
shared_ptr<google::cloud::Credentials> creds = nullptr;
if (!endpoint_.empty() || getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) {
if (!service_account_key_.empty()) {
if (!workload_identity_configuration_.empty()) {
LOG_WARN(
"Both GCS service account key and workload identity configuration "
"were specified; picking the former");
}
creds = google::cloud::MakeServiceAccountCredentials(
service_account_key_, options);
} else if (!workload_identity_configuration_.empty()) {
creds = google::cloud::MakeExternalAccountCredentials(
workload_identity_configuration_, options);
} else if (!endpoint_.empty() || getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) {
creds = google::cloud::MakeInsecureCredentials();
} else {
creds = google::cloud::MakeGoogleDefaultCredentials(options);
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/filesystem/gcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,12 @@ class GCS {
// The GCS project id.
std::string project_id_;

// The GCS service account credentials JSON string.
std::string service_account_key_;

// The GCS external account credentials JSON string.
std::string workload_identity_configuration_;

// A comma-separated list with the GCS service accounts to impersonate.
std::string impersonate_service_account_;

Expand Down

0 comments on commit f72c428

Please sign in to comment.