From 7ad870c5b99a31706dd307028d196cc7e53df125 Mon Sep 17 00:00:00 2001 From: KiterLuc <67824247+KiterLuc@users.noreply.github.com> Date: Tue, 11 Jun 2024 23:10:46 +0200 Subject: [PATCH] [Backport-release-2.24] Support setting S3 storage class. (#5053) (#5068) Backport https://github.com/TileDB-Inc/TileDB/commit/875f8f45d77a8fcafbcb1e06faf88c9a9e569e8c from https://github.com/TileDB-Inc/TileDB/pull/5053. --- TYPE: CONFIG DESC: Add `vfs.s3.storage_class` config option to set the storage class of newly uploaded S3 objects. Co-authored-by: Theodore Tsirpanis --- test/src/unit-capi-config.cc | 4 ++ test/src/unit-cppapi-config.cc | 2 +- tiledb/api/c_api/config/config_api_external.h | 16 ++++++ tiledb/sm/config/config.cc | 2 + tiledb/sm/config/config.h | 3 + tiledb/sm/cpp_api/config.h | 17 ++++++ tiledb/sm/filesystem/s3.cc | 57 +++++++++++++++++++ tiledb/sm/filesystem/s3.h | 8 +++ 8 files changed, 108 insertions(+), 1 deletion(-) diff --git a/test/src/unit-capi-config.cc b/test/src/unit-capi-config.cc index ca4269d88a8..5f8abc0260d 100644 --- a/test/src/unit-capi-config.cc +++ b/test/src/unit-capi-config.cc @@ -340,6 +340,7 @@ void check_save_to_file() { ss << "vfs.s3.requester_pays false\n"; ss << "vfs.s3.scheme https\n"; ss << "vfs.s3.skip_init false\n"; + ss << "vfs.s3.storage_class NOT_SET\n"; ss << "vfs.s3.use_multipart_upload true\n"; ss << "vfs.s3.use_virtual_addressing true\n"; ss << "vfs.s3.verify_ssl true\n"; @@ -739,6 +740,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { all_param_values["vfs.s3.connect_scale_factor"] = "25"; all_param_values["vfs.s3.sse"] = ""; all_param_values["vfs.s3.sse_kms_key_id"] = ""; + all_param_values["vfs.s3.storage_class"] = "NOT_SET"; all_param_values["vfs.s3.logging_level"] = "Off"; all_param_values["vfs.s3.request_timeout_ms"] = "3000"; all_param_values["vfs.s3.requester_pays"] = "false"; @@ -812,6 +814,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { vfs_param_values["s3.connect_scale_factor"] = "25"; vfs_param_values["s3.sse"] = ""; vfs_param_values["s3.sse_kms_key_id"] = ""; + vfs_param_values["s3.storage_class"] = "NOT_SET"; vfs_param_values["s3.logging_level"] = "Off"; vfs_param_values["s3.request_timeout_ms"] = "3000"; vfs_param_values["s3.requester_pays"] = "false"; @@ -880,6 +883,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { s3_param_values["connect_scale_factor"] = "25"; s3_param_values["sse"] = ""; s3_param_values["sse_kms_key_id"] = ""; + s3_param_values["storage_class"] = "NOT_SET"; s3_param_values["logging_level"] = "Off"; s3_param_values["request_timeout_ms"] = "3000"; s3_param_values["requester_pays"] = "false"; diff --git a/test/src/unit-cppapi-config.cc b/test/src/unit-cppapi-config.cc index d84e9996b1b..b6e5d6d1151 100644 --- a/test/src/unit-cppapi-config.cc +++ b/test/src/unit-cppapi-config.cc @@ -75,7 +75,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") { names.push_back(it->first); } // Check number of VFS params in default config object. - CHECK(names.size() == 68); + CHECK(names.size() == 69); } TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") { diff --git a/tiledb/api/c_api/config/config_api_external.h b/tiledb/api/c_api/config/config_api_external.h index 5e0e8c6b312..161a5a75ae6 100644 --- a/tiledb/api/c_api/config/config_api_external.h +++ b/tiledb/api/c_api/config/config_api_external.h @@ -569,6 +569,22 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT; * The server-side encryption key to use if * vfs.s3.sse == "kms" (AWS key management service).
* **Default**: "" + * - `vfs.s3.storage_class`
+ * The storage class to use for the newly uploaded S3 objects. The set of + * accepted values is found in the Aws::S3::Model::StorageClass enumeration. + * "NOT_SET" + * "STANDARD" + * "REDUCED_REDUNDANCY" + * "STANDARD_IA" + * "ONEZONE_IA" + * "INTELLIGENT_TIERING" + * "GLACIER" + * "DEEP_ARCHIVE" + * "OUTPOSTS" + * "GLACIER_IR" + * "SNOW" + * "EXPRESS_ONEZONE"
+ * **Default**: "NOT_SET" * - `vfs.s3.bucket_canned_acl`
* Names of values found in Aws::S3::Model::BucketCannedACL enumeration. * "NOT_SET" diff --git a/tiledb/sm/config/config.cc b/tiledb/sm/config/config.cc index 69ce8565a85..4a5e677295c 100644 --- a/tiledb/sm/config/config.cc +++ b/tiledb/sm/config/config.cc @@ -219,6 +219,7 @@ const std::string Config::VFS_S3_CONNECT_MAX_TRIES = "5"; const std::string Config::VFS_S3_CONNECT_SCALE_FACTOR = "25"; const std::string Config::VFS_S3_SSE = ""; const std::string Config::VFS_S3_SSE_KMS_KEY_ID = ""; +const std::string Config::VFS_S3_STORAGE_CLASS = "NOT_SET"; const std::string Config::VFS_S3_REQUEST_TIMEOUT_MS = "3000"; const std::string Config::VFS_S3_REQUESTER_PAYS = "false"; const std::string Config::VFS_S3_PROXY_SCHEME = "http"; @@ -482,6 +483,7 @@ const std::map default_config_values = { "vfs.s3.connect_scale_factor", Config::VFS_S3_CONNECT_SCALE_FACTOR), std::make_pair("vfs.s3.sse", Config::VFS_S3_SSE), std::make_pair("vfs.s3.sse_kms_key_id", Config::VFS_S3_SSE_KMS_KEY_ID), + std::make_pair("vfs.s3.storage_class", Config::VFS_S3_STORAGE_CLASS), std::make_pair( "vfs.s3.request_timeout_ms", Config::VFS_S3_REQUEST_TIMEOUT_MS), std::make_pair("vfs.s3.requester_pays", Config::VFS_S3_REQUESTER_PAYS), diff --git a/tiledb/sm/config/config.h b/tiledb/sm/config/config.h index 2fd464470e4..3dd6fa760f1 100644 --- a/tiledb/sm/config/config.h +++ b/tiledb/sm/config/config.h @@ -561,6 +561,9 @@ class Config { /** The S3 KMS key id for KMS server-side-encryption. */ static const std::string VFS_S3_SSE_KMS_KEY_ID; + /** The S3 storage class to upload objects to. */ + static const std::string VFS_S3_STORAGE_CLASS; + /** Request timeout in milliseconds. */ static const std::string VFS_S3_REQUEST_TIMEOUT_MS; diff --git a/tiledb/sm/cpp_api/config.h b/tiledb/sm/cpp_api/config.h index 02d8105bc59..cd1c2110ad9 100644 --- a/tiledb/sm/cpp_api/config.h +++ b/tiledb/sm/cpp_api/config.h @@ -744,6 +744,23 @@ class Config { * The server-side encryption key to use if * vfs.s3.sse == "kms" (AWS key management service).
* **Default**: "" + * - `vfs.s3.storage_class`
+ * The storage class to use for the newly uploaded S3 objects. The set of + * accepted values is found in the Aws::S3::Model::StorageClass + * enumeration. + * "NOT_SET" + * "STANDARD" + * "REDUCED_REDUNDANCY" + * "STANDARD_IA" + * "ONEZONE_IA" + * "INTELLIGENT_TIERING" + * "GLACIER" + * "DEEP_ARCHIVE" + * "OUTPOSTS" + * "GLACIER_IR" + * "SNOW" + * "EXPRESS_ONEZONE"
+ * **Default**: "NOT_SET" * - `vfs.s3.bucket_canned_acl`
* Names of values found in Aws::S3::Model::BucketCannedACL enumeration. * "NOT_SET" diff --git a/tiledb/sm/filesystem/s3.cc b/tiledb/sm/filesystem/s3.cc index 708716f8891..957325ba7e6 100644 --- a/tiledb/sm/filesystem/s3.cc +++ b/tiledb/sm/filesystem/s3.cc @@ -81,6 +81,13 @@ using tiledb::common::filesystem::directory_entry; namespace { +/* + * Functions to convert strings to AWS enums. + * + * The AWS SDK provides some enum conversion functions, but they must not be + * used, because they have non-deterministic behavior in certain scenarios. + */ + Aws::Utils::Logging::LogLevel aws_log_name_to_level(std::string loglevel) { std::transform(loglevel.begin(), loglevel.end(), loglevel.begin(), ::tolower); if (loglevel == "fatal") @@ -157,6 +164,47 @@ Aws::S3::Model::BucketCannedACL S3_BucketCannedACL_from_str( return Aws::S3::Model::BucketCannedACL::NOT_SET; } +/** + * Return a S3 enum value for any recognized string or NOT_SET if + * B) the string is not recognized to match any of the enum values + * + * @param storage_class_str A textual string naming one of the + * Aws::S3::Model::StorageClass enum members. + */ +Aws::S3::Model::StorageClass S3_StorageClass_from_str( + const std::string& storage_class_str) { + using Aws::S3::Model::StorageClass; + if (storage_class_str.empty()) + return StorageClass::NOT_SET; + + if (storage_class_str == "NOT_SET") + return StorageClass::NOT_SET; + else if (storage_class_str == "STANDARD") + return StorageClass::STANDARD; + else if (storage_class_str == "REDUCED_REDUNDANCY") + return StorageClass::REDUCED_REDUNDANCY; + else if (storage_class_str == "STANDARD_IA") + return StorageClass::STANDARD_IA; + else if (storage_class_str == "ONEZONE_IA") + return StorageClass::ONEZONE_IA; + else if (storage_class_str == "INTELLIGENT_TIERING") + return StorageClass::INTELLIGENT_TIERING; + else if (storage_class_str == "GLACIER") + return StorageClass::GLACIER; + else if (storage_class_str == "DEEP_ARCHIVE") + return StorageClass::DEEP_ARCHIVE; + else if (storage_class_str == "OUTPOSTS") + return StorageClass::OUTPOSTS; + else if (storage_class_str == "GLACIER_IR") + return StorageClass::GLACIER_IR; + else if (storage_class_str == "SNOW") + return StorageClass::SNOW; + else if (storage_class_str == "EXPRESS_ONEZONE") + return StorageClass::EXPRESS_ONEZONE; + else + return StorageClass::NOT_SET; +} + } // namespace using namespace tiledb::common; @@ -200,6 +248,7 @@ S3::S3( s3_params_.requester_pays_ ? Aws::S3::Model::RequestPayer::requester : Aws::S3::Model::RequestPayer::NOT_SET) , sse_(Aws::S3::Model::ServerSideEncryption::NOT_SET) + , storage_class_(S3_StorageClass_from_str(s3_params_.storage_class_)) , object_canned_acl_( S3_ObjectCannedACL_from_str(s3_params_.object_acl_str_)) , bucket_canned_acl_( @@ -511,6 +560,10 @@ void S3::touch(const URI& uri) const { if (!s3_params_.sse_kms_key_id_.empty()) put_object_request.SetSSEKMSKeyId( Aws::String(s3_params_.sse_kms_key_id_.c_str())); + // TODO: These checks are not needed since AWS SDK 1.11.275 + // https://github.com/aws/aws-sdk-cpp/pull/2875 + if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET) + put_object_request.SetStorageClass(storage_class_); if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) { put_object_request.SetACL(object_canned_acl_); } @@ -1562,6 +1615,8 @@ Status S3::initiate_multipart_request( if (!s3_params_.sse_kms_key_id_.empty()) multipart_upload_request.SetSSEKMSKeyId( Aws::String(s3_params_.sse_kms_key_id_.c_str())); + if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET) + multipart_upload_request.SetStorageClass(storage_class_); if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) { multipart_upload_request.SetACL(object_canned_acl_); } @@ -1756,6 +1811,8 @@ void S3::write_direct(const URI& uri, const void* buffer, uint64_t length) { if (!s3_params_.sse_kms_key_id_.empty()) put_object_request.SetSSEKMSKeyId( Aws::String(s3_params_.sse_kms_key_id_.c_str())); + if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET) + put_object_request.SetStorageClass(storage_class_); if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) { put_object_request.SetACL(object_canned_acl_); } diff --git a/tiledb/sm/filesystem/s3.h b/tiledb/sm/filesystem/s3.h index 8c074479aee..dec4ed7e384 100644 --- a/tiledb/sm/filesystem/s3.h +++ b/tiledb/sm/filesystem/s3.h @@ -224,6 +224,8 @@ struct S3Parameters { sse_algorithm_ == "kms" ? config.get("vfs.s3.sse_kms_key_id").value() : "") + , storage_class_( + config.get("vfs.s3.storage_class", Config::must_find)) , bucket_acl_str_(config.get( "vfs.s3.bucket_canned_acl", Config::must_find)) , object_acl_str_(config.get( @@ -326,6 +328,9 @@ struct S3Parameters { /** The server-side encryption key to use with the kms algorithm. */ std::string sse_kms_key_id_; + /** The S3 storage class. */ + std::string storage_class_; + /** Names of values found in Aws::S3::Model::BucketCannedACL enumeration. */ std::string bucket_acl_str_; @@ -1378,6 +1383,9 @@ class S3 : FilesystemBase { /** The server-side encryption algorithm. */ Aws::S3::Model::ServerSideEncryption sse_; + /** The storage class for a s3 upload request. */ + Aws::S3::Model::StorageClass storage_class_; + /** Protects file_buffers map */ std::mutex file_buffers_mtx_;