Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support specifying GCP account credentials as a config option. #4855

Merged
merged 7 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["vfs.read_logging_mode"] = "";
all_param_values["vfs.gcs.endpoint"] = "";
all_param_values["vfs.gcs.project_id"] = "";
all_param_values["vfs.gcs.service_account_key"] = "";
all_param_values["vfs.gcs.workload_identity_configuration"] = "";
all_param_values["vfs.gcs.impersonate_service_account"] = "";
all_param_values["vfs.gcs.max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down Expand Up @@ -761,6 +763,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
vfs_param_values["read_logging_mode"] = "";
vfs_param_values["gcs.endpoint"] = "";
vfs_param_values["gcs.project_id"] = "";
vfs_param_values["gcs.service_account_key"] = "";
vfs_param_values["gcs.workload_identity_configuration"] = "";
vfs_param_values["gcs.impersonate_service_account"] = "";
vfs_param_values["gcs.max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down Expand Up @@ -825,6 +829,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
std::map<std::string, std::string> gcs_param_values;
gcs_param_values["endpoint"] = "";
gcs_param_values["project_id"] = "";
gcs_param_values["service_account_key"] = "";
gcs_param_values["workload_identity_configuration"] = "";
gcs_param_values["impersonate_service_account"] = "";
gcs_param_values["max_parallel_ops"] =
std::to_string(std::thread::hardware_concurrency());
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-cppapi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") {
names.push_back(it->first);
}
// Check number of VFS params in default config object.
CHECK(names.size() == 66);
CHECK(names.size() == 68);
}

TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") {
Expand Down
94 changes: 93 additions & 1 deletion test/src/unit-vfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@ TEST_CASE("Validate vfs.s3.custom_headers.*", "[s3][custom-headers]") {

#ifdef HAVE_GCS
TEST_CASE(
"Validate GCS service account impersonation", "[gcs][impersonation]") {
"Validate GCS service account impersonation",
"[gcs][credentials][impersonation]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
Expand Down Expand Up @@ -802,4 +803,95 @@ TEST_CASE(
target_service_account);
REQUIRE(impersonate_credentials->delegates() == delegates);
}

TEST_CASE(
"Validate GCS service account credentials",
"[gcs][credentials][service-account]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string service_account_key = "{\"foo\": \"bar\"}";

require_tiledb_ok(
cfg.set("vfs.gcs.service_account_key", service_account_key));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto service_account =
dynamic_cast<google::cloud::internal::ServiceAccountConfig*>(
credentials.get());

REQUIRE(service_account != nullptr);
REQUIRE(service_account->json_object() == service_account_key);
}

TEST_CASE(
"Validate GCS service account credentials with impersonation",
"[gcs][credentials][service-account-and-impersonation]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string service_account_key = "{\"foo\": \"bar\"}";
std::string impersonate_service_account = "account1,account2,account3";

require_tiledb_ok(
cfg.set("vfs.gcs.service_account_key", service_account_key));
require_tiledb_ok(cfg.set(
"vfs.gcs.impersonate_service_account", impersonate_service_account));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto impersonate_credentials =
dynamic_cast<google::cloud::internal::ImpersonateServiceAccountConfig*>(
credentials.get());
REQUIRE(impersonate_credentials != nullptr);
REQUIRE(impersonate_credentials->target_service_account() == "account3");
REQUIRE(
impersonate_credentials->delegates() ==
std::vector<std::string>{"account1", "account2"});

auto inner_service_account =
dynamic_cast<google::cloud::internal::ServiceAccountConfig*>(
impersonate_credentials->base_credentials().get());

REQUIRE(inner_service_account != nullptr);
REQUIRE(inner_service_account->json_object() == service_account_key);
}

TEST_CASE(
"Validate GCS external account credentials",
"[gcs][credentials][external-account]") {
ThreadPool thread_pool(2);
Config cfg = set_config_params(true);
GCS gcs;
// The content of the credentials does not matter; it does not get parsed
// until it is used in an API request, which we are not doing.
std::string workload_identity_configuration = "{\"foo\": \"bar\"}";

require_tiledb_ok(cfg.set(
"vfs.gcs.workload_identity_configuration",
workload_identity_configuration));

require_tiledb_ok(gcs.init(cfg, &thread_pool));

auto credentials = gcs.make_credentials({});

// We are using an internal class only for inspection purposes.
auto external_account =
dynamic_cast<google::cloud::internal::ExternalAccountConfig*>(
credentials.get());

REQUIRE(external_account != nullptr);
REQUIRE(external_account->json_object() == workload_identity_configuration);
}
#endif
11 changes: 11 additions & 0 deletions tiledb/api/c_api/config/config_api_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,17 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT;
* - `vfs.gcs.project_id` <br>
* Set the GCS project id. <br>
* **Default**: ""
* - `vfs.gcs.service_account_key` <br>
* Set the JSON string with GCS service account key. Takes precedence
* over `vfs.gcs.workload_identity_configuration` if both are specified. If
* neither is specified, Application Default Credentials will be used. <br>
* **Default**: ""
* - `vfs.gcs.workload_identity_configuration` <br>
* Set the JSON string with Workload Identity Federation credentials.
* `vfs.gcs.service_account_key` takes precedence over this if both are
* specified. If neither is specified, Application Default Credentials will
* be used. <br>
* **Default**: ""
* - `vfs.gcs.impersonate_service_account` <br>
* Set the GCS service account to impersonate. A chain of impersonated
* accounts can be formed by specifying many service accounts, separated by a
Expand Down
9 changes: 9 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ const std::string Config::VFS_AZURE_RETRY_DELAY_MS = "800";
const std::string Config::VFS_AZURE_MAX_RETRY_DELAY_MS = "60000";
const std::string Config::VFS_GCS_ENDPOINT = "";
const std::string Config::VFS_GCS_PROJECT_ID = "";
const std::string Config::VFS_GCS_SERVICE_ACCOUNT_KEY = "";
const std::string Config::VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION = "";
const std::string Config::VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT = "";
const std::string Config::VFS_GCS_MAX_PARALLEL_OPS =
Config::SM_IO_CONCURRENCY_LEVEL;
Expand Down Expand Up @@ -421,6 +423,11 @@ const std::map<std::string, std::string> default_config_values = {
"vfs.azure.max_retry_delay_ms", Config::VFS_AZURE_MAX_RETRY_DELAY_MS),
std::make_pair("vfs.gcs.endpoint", Config::VFS_GCS_ENDPOINT),
std::make_pair("vfs.gcs.project_id", Config::VFS_GCS_PROJECT_ID),
std::make_pair(
"vfs.gcs.service_account_key", Config::VFS_GCS_SERVICE_ACCOUNT_KEY),
std::make_pair(
"vfs.gcs.workload_identity_configuration",
Config::VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION),
std::make_pair(
"vfs.gcs.impersonate_service_account",
Config::VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT),
Expand Down Expand Up @@ -513,6 +520,8 @@ const std::set<std::string> Config::unserialized_params_ = {
"vfs.s3.aws_external_id",
"vfs.s3.aws_load_frequency",
"vfs.s3.aws_session_name",
"vfs.gcs.service_account_key",
"vfs.gcs.workload_identity_configuration",
"vfs.gcs.impersonate_service_account",
"rest.username",
"rest.password",
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,12 @@ class Config {
/** GCS service account(s) to impersonate. */
static const std::string VFS_GCS_IMPERSONATE_SERVICE_ACCOUNT;

/** GCS service account key JSON string. */
static const std::string VFS_GCS_SERVICE_ACCOUNT_KEY;

/** GCS external account credentials JSON string. */
static const std::string VFS_GCS_WORKLOAD_IDENTITY_CONFIGURATION;

/** GCS max parallel ops. */
static const std::string VFS_GCS_MAX_PARALLEL_OPS;

Expand Down
11 changes: 11 additions & 0 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,17 @@ class Config {
* - `vfs.gcs.project_id` <br>
* Set the GCS project id. <br>
* **Default**: ""
* - `vfs.gcs.service_account_key` <br>
* Set the JSON string with GCS service account key. Takes precedence
* over `vfs.gcs.workload_identity_configuration` if both are specified. If
* neither is specified, Application Default Credentials will be used. <br>
* **Default**: ""
* - `vfs.gcs.workload_identity_configuration` <br>
* Set the JSON string with Workload Identity Federation credentials.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: credentials to configuration here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

* `vfs.gcs.service_account_key` takes precedence over this if both are
* specified. If neither is specified, Application Default Credentials will
* be used. <br>
* **Default**: ""
* - `vfs.gcs.impersonate_service_account` <br>
* Set the GCS service account to impersonate. A chain of impersonated
* accounts can be formed by specifying many service accounts, separated by
Expand Down
18 changes: 17 additions & 1 deletion tiledb/sm/filesystem/gcs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ Status GCS::init(const Config& config, ThreadPool* const thread_pool) {
}
project_id_ = config.get("vfs.gcs.project_id", &found);
assert(found);
service_account_key_ = config.get("vfs.gcs.service_account_key", &found);
assert(found);
workload_identity_configuration_ =
config.get("vfs.gcs.workload_identity_configuration", &found);
assert(found);
impersonate_service_account_ =
config.get("vfs.gcs.impersonate_service_account", &found);
assert(found);
Expand Down Expand Up @@ -187,7 +192,18 @@ static shared_ptr<google::cloud::Credentials> apply_impersonation(
std::shared_ptr<google::cloud::Credentials> GCS::make_credentials(
const google::cloud::Options& options) const {
shared_ptr<google::cloud::Credentials> creds = nullptr;
if (!endpoint_.empty() || getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) {
if (!service_account_key_.empty()) {
if (!workload_identity_configuration_.empty()) {
LOG_WARN(
"Both GCS service account credentials and external account "
"credentials were specified; picking the former");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another string to update with the new terminology (maybe the exact config key name?). I also might phrase this as something like "service account key set; ignoring workload identity configuration" to make it a little clearer (at least in my opinion; you are of course free to leave it as-is).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

}
creds = google::cloud::MakeServiceAccountCredentials(
service_account_key_, options);
} else if (!workload_identity_configuration_.empty()) {
creds = google::cloud::MakeExternalAccountCredentials(
workload_identity_configuration_, options);
} else if (!endpoint_.empty() || getenv("CLOUD_STORAGE_EMULATOR_ENDPOINT")) {
creds = google::cloud::MakeInsecureCredentials();
} else {
creds = google::cloud::MakeGoogleDefaultCredentials(options);
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/filesystem/gcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,12 @@ class GCS {
// The GCS project id.
std::string project_id_;

// The GCS service account credentials JSON string.
std::string service_account_key_;

// The GCS external account credentials JSON string.
std::string workload_identity_configuration_;

// A comma-separated list with the GCS service accounts to impersonate.
std::string impersonate_service_account_;

Expand Down
Loading