From cf8c30fd3059c6b8f7668640c70fccf522838e98 Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Wed, 25 Sep 2024 05:21:55 +0530 Subject: [PATCH 1/7] use object_store 0.10.2 patch with SSE-C --- Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 156820ec5..0d65fea62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,6 @@ [workspace] members = ["server"] resolver = "2" + +[patch.crates-io] +object_store = { git = "https://github.com/MihirLuthra/arrow-rs.git", branch = "mihir/I-919-0.10.2-with-sse-c" } From 3ffa7c3f810e433f5e042d63d28039cb01c987cf Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Wed, 25 Sep 2024 05:22:47 +0530 Subject: [PATCH 2/7] add --object-sse to allow using SSE-C --- server/src/storage/s3.rs | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 0627100a2..0b2d5701a 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -32,8 +32,10 @@ use object_store::{ClientOptions, ObjectStore, PutPayload}; use relative_path::{RelativePath, RelativePathBuf}; use std::collections::BTreeMap; +use std::fmt::Display; use std::iter::Iterator; use std::path::Path as StdPath; +use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -84,6 +86,12 @@ pub struct S3Config { #[arg(long, env = "P_S3_BUCKET", value_name = "bucket-name", required = true)] pub bucket_name: String, + /// Server side encryption to use for operations with objects. + /// Currently, this only supports SSE-C. Value should be + /// like AES256:. + #[arg(long, env = "P_OBJECT_SSE", value_name = "object-sse")] + pub object_sse: Option, + /// Set client to send checksum header on every put request #[arg( long, @@ -130,6 +138,67 @@ pub struct S3Config { pub metadata_endpoint: Option, } +/// This represents the server side encryption to be +/// used when working with S3 objects. +#[derive(Debug, Clone)] +pub enum ObjectSse { + /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html + SseC { + // algorithm unused but being tracked separately to maintain + // consistent interface via CLI if AWS adds any new algorithms + // in future. + _algorithm: ObjectEncryptionAlgorithm, + base64_encryption_key: String, + }, +} + +impl FromStr for ObjectSse { + type Err = String; + + fn from_str(s: &str) -> Result { + let parts = s.split(':').collect::>(); + if parts.len() == 2 { + let algorithm = parts[0]; + let encryption_key = parts[1]; + + let alg = ObjectEncryptionAlgorithm::from_str(algorithm)?; + + Ok(match alg { + ObjectEncryptionAlgorithm::Aes256 => ObjectSse::SseC { + _algorithm: alg, + base64_encryption_key: encryption_key.to_owned(), + }, + }) + } else { + Err("Expected :".into()) + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum ObjectEncryptionAlgorithm { + Aes256, +} + +impl FromStr for ObjectEncryptionAlgorithm { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "AES256" => Ok(ObjectEncryptionAlgorithm::Aes256), + _ => Err("Invalid SSE algorithm. Following are supported: AES256".into()), + } + } +} + +impl Display for ObjectEncryptionAlgorithm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ObjectEncryptionAlgorithm::Aes256 => write!(f, "AES256"), + } + } +} + impl S3Config { fn get_default_builder(&self) -> AmazonS3Builder { let mut client_options = ClientOptions::default() @@ -160,6 +229,14 @@ impl S3Config { .with_secret_access_key(secret_key); } + if let Some(object_sse) = &self.object_sse { + match object_sse { + ObjectSse::SseC { _algorithm, base64_encryption_key } => { + builder = builder.with_ssec_encryption(base64_encryption_key); + }, + } + } + if let Ok(relative_uri) = std::env::var(AWS_CONTAINER_CREDENTIALS_RELATIVE_URI) { builder = builder.with_config( AmazonS3ConfigKey::ContainerCredentialsRelativeUri, From f0e1ec68a6ec45c69806726b2ebef6e07dc2604b Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Wed, 25 Sep 2024 17:11:39 +0530 Subject: [PATCH 3/7] cargo fmt --- server/src/storage/s3.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 0b2d5701a..d6d93f7d2 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -231,9 +231,12 @@ impl S3Config { if let Some(object_sse) = &self.object_sse { match object_sse { - ObjectSse::SseC { _algorithm, base64_encryption_key } => { + ObjectSse::SseC { + _algorithm, + base64_encryption_key, + } => { builder = builder.with_ssec_encryption(base64_encryption_key); - }, + } } } From e5e589fb6454d72227ecef8d5c9da063536e6ed8 Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Fri, 27 Sep 2024 02:28:25 +0530 Subject: [PATCH 4/7] specify SSE-C in --object-sse as well --- server/src/storage/s3.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index d6d93f7d2..3133b5c3e 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -88,7 +88,7 @@ pub struct S3Config { /// Server side encryption to use for operations with objects. /// Currently, this only supports SSE-C. Value should be - /// like AES256:. + /// like SSE-C:AES256:. #[arg(long, env = "P_OBJECT_SSE", value_name = "object-sse")] pub object_sse: Option, @@ -157,9 +157,14 @@ impl FromStr for ObjectSse { fn from_str(s: &str) -> Result { let parts = s.split(':').collect::>(); - if parts.len() == 2 { - let algorithm = parts[0]; - let encryption_key = parts[1]; + if parts.len() == 3 { + let sse_type = parts[0]; + if sse_type != "SSE-C" { + return Err("Only SSE-C is supported for object encryption for now".into()); + } + + let algorithm = parts[1]; + let encryption_key = parts[2]; let alg = ObjectEncryptionAlgorithm::from_str(algorithm)?; @@ -170,7 +175,7 @@ impl FromStr for ObjectSse { }, }) } else { - Err("Expected :".into()) + Err("Expected SSE-C:AES256:".into()) } } } From b05100bf824a5dd1e9ccb5a0ab66f00ed1970498 Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Wed, 9 Oct 2024 23:30:14 +0530 Subject: [PATCH 5/7] use arrow-rs fork in parseablehq --- Cargo.lock | 11 +++++------ Cargo.toml | 6 +++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 641ff87f5..ee3f33cf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2962,8 +2962,7 @@ dependencies = [ [[package]] name = "object_store" version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +source = "git+https://github.com/parseablehq/arrow-rs.git?rev=23b6ff9f432e8e29c08d47a315ba0b7cb8758225#23b6ff9f432e8e29c08d47a315ba0b7cb8758225" dependencies = [ "async-trait", "base64 0.22.0", @@ -3447,8 +3446,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", - "heck 0.5.0", - "itertools 0.13.0", + "heck 0.4.1", + "itertools 0.12.1", "log", "multimap", "once_cell", @@ -3468,7 +3467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.12.1", "proc-macro2", "quote", "syn 2.0.79", @@ -4212,7 +4211,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.79", diff --git a/Cargo.toml b/Cargo.toml index 0d65fea62..3c626482e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,4 +3,8 @@ members = ["server"] resolver = "2" [patch.crates-io] -object_store = { git = "https://github.com/MihirLuthra/arrow-rs.git", branch = "mihir/I-919-0.10.2-with-sse-c" } +# object_store added support for SSE-C headers in: +# - https://github.com/apache/arrow-rs/pull/6230 +# - https://github.com/apache/arrow-rs/pull/6260 +# But a new version hasn't been published to crates.io for this yet. So, we are using this patch temporarily. +object_store = { git = "https://github.com/parseablehq/arrow-rs.git", rev = "23b6ff9f432e8e29c08d47a315ba0b7cb8758225" } From 9ec51e08c0ecafee36110b78b1526330639707d9 Mon Sep 17 00:00:00 2001 From: Mihir Luthra Date: Thu, 10 Oct 2024 14:57:35 +0530 Subject: [PATCH 6/7] use apache/arrow-rs commit with SSEC changes --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee3f33cf5..440e5977f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2962,7 +2962,7 @@ dependencies = [ [[package]] name = "object_store" version = "0.11.0" -source = "git+https://github.com/parseablehq/arrow-rs.git?rev=23b6ff9f432e8e29c08d47a315ba0b7cb8758225#23b6ff9f432e8e29c08d47a315ba0b7cb8758225" +source = "git+https://github.com/apache/arrow-rs.git?rev=23b6ff9f432e8e29c08d47a315ba0b7cb8758225#23b6ff9f432e8e29c08d47a315ba0b7cb8758225" dependencies = [ "async-trait", "base64 0.22.0", @@ -3446,7 +3446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", - "heck 0.4.1", + "heck 0.5.0", "itertools 0.12.1", "log", "multimap", @@ -4211,7 +4211,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.79", diff --git a/Cargo.toml b/Cargo.toml index 3c626482e..0e263dc4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,4 @@ resolver = "2" # - https://github.com/apache/arrow-rs/pull/6230 # - https://github.com/apache/arrow-rs/pull/6260 # But a new version hasn't been published to crates.io for this yet. So, we are using this patch temporarily. -object_store = { git = "https://github.com/parseablehq/arrow-rs.git", rev = "23b6ff9f432e8e29c08d47a315ba0b7cb8758225" } +object_store = { git = "https://github.com/apache/arrow-rs.git", rev = "23b6ff9f432e8e29c08d47a315ba0b7cb8758225" } From 8858e9e71a150cb3d0e7c2d7c2d7f6d731edc8a9 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Sat, 12 Oct 2024 18:11:41 +0530 Subject: [PATCH 7/7] renamed env var P_OBJECT_SSE to P_S3_SSEC_ENCRYPTION_KEY --- server/src/storage/s3.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 3133b5c3e..0c9eb982e 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -89,8 +89,12 @@ pub struct S3Config { /// Server side encryption to use for operations with objects. /// Currently, this only supports SSE-C. Value should be /// like SSE-C:AES256:. - #[arg(long, env = "P_OBJECT_SSE", value_name = "object-sse")] - pub object_sse: Option, + #[arg( + long, + env = "P_S3_SSEC_ENCRYPTION_KEY", + value_name = "ssec-encryption-key" + )] + pub ssec_encryption_key: Option, /// Set client to send checksum header on every put request #[arg( @@ -141,7 +145,7 @@ pub struct S3Config { /// This represents the server side encryption to be /// used when working with S3 objects. #[derive(Debug, Clone)] -pub enum ObjectSse { +pub enum SSECEncryptionKey { /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html SseC { // algorithm unused but being tracked separately to maintain @@ -152,7 +156,7 @@ pub enum ObjectSse { }, } -impl FromStr for ObjectSse { +impl FromStr for SSECEncryptionKey { type Err = String; fn from_str(s: &str) -> Result { @@ -169,7 +173,7 @@ impl FromStr for ObjectSse { let alg = ObjectEncryptionAlgorithm::from_str(algorithm)?; Ok(match alg { - ObjectEncryptionAlgorithm::Aes256 => ObjectSse::SseC { + ObjectEncryptionAlgorithm::Aes256 => SSECEncryptionKey::SseC { _algorithm: alg, base64_encryption_key: encryption_key.to_owned(), }, @@ -234,9 +238,9 @@ impl S3Config { .with_secret_access_key(secret_key); } - if let Some(object_sse) = &self.object_sse { - match object_sse { - ObjectSse::SseC { + if let Some(ssec_encryption_key) = &self.ssec_encryption_key { + match ssec_encryption_key { + SSECEncryptionKey::SseC { _algorithm, base64_encryption_key, } => {