Skip to content

Commit

Permalink
Various cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Nov 3, 2024
1 parent 20bf5df commit 452185c
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 62 deletions.
38 changes: 18 additions & 20 deletions cpp/src/arrow/filesystem/s3_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ const char* kEnvConnectString = "ARROW_TEST_S3_CONNECT_STRING";
const char* kEnvAccessKey = "ARROW_TEST_S3_ACCESS_KEY";
const char* kEnvSecretKey = "ARROW_TEST_S3_SECRET_KEY";

std::string GenerateConnectString() { return GetListenAddress(); }

} // namespace

struct MinioTestServer::Impl {
Expand Down Expand Up @@ -115,7 +113,7 @@ Status MinioTestServer::GenerateCertificateFile() {
return Status::OK();
}

Status MinioTestServer::Start(bool enable_tls_if_supported) {
Status MinioTestServer::Start(bool enable_tls) {
const char* connect_str = std::getenv(kEnvConnectString);
const char* access_key = std::getenv(kEnvAccessKey);
const char* secret_key = std::getenv(kEnvSecretKey);
Expand All @@ -134,25 +132,23 @@ Status MinioTestServer::Start(bool enable_tls_if_supported) {
impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
// Disable the embedded console (one less listening address to care about)
impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
impl_->connect_string_ = GenerateConnectString();
// NOTE: --quiet makes startup faster by suppressing remote version check
std::vector<std::string> minio_args({"server", "--quiet", "--compat"});
if (enable_tls_if_supported) {
#ifdef MINIO_SERVER_WITH_TLS
if (enable_tls) {
ARROW_RETURN_NOT_OK(GenerateCertificateFile());
minio_args.emplace_back("--certs-dir");
minio_args.emplace_back(ca_dir_path());
impl_->scheme_ = "https";
impl_->connect_string_ =
GetListenAddress("localhost"); // for TLS enabled case, we need to use localhost
// which is the fixed hostname in the certificate
#endif // MINIO_SERVER_WITH_TLS
// With TLS enabled, we need the connection hostname to match the certificate's
// subject name. This also constrains the actual listening IP address.
impl_->connect_string_ = GetListenAddress("localhost");
} else {
// Without TLS enabled, we want to minimize the likelihood of address collisions
// by varying the listening IP address (note that most tests don't enable TLS).
impl_->connect_string_ = GetListenAddress();
}
minio_args.emplace_back("--address");
minio_args.emplace_back(
impl_->connect_string_); // the connect_string_ differs for the http and https,
// https is fixed localhost while http is the dynamic ip
// in range 127.0.0.1/8
minio_args.emplace_back(impl_->connect_string_);
minio_args.emplace_back(impl_->temp_dir_->path().ToString());

ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
Expand All @@ -168,27 +164,29 @@ Status MinioTestServer::Stop() {

struct MinioTestEnvironment::Impl {
std::function<Future<std::shared_ptr<MinioTestServer>>()> server_generator_;
bool enable_tls_;

explicit Impl(bool enable_tls) : enable_tls_(enable_tls) {}

Result<std::shared_ptr<MinioTestServer>> LaunchOneServer() {
auto server = std::make_shared<MinioTestServer>();
RETURN_NOT_OK(server->Start());
RETURN_NOT_OK(server->Start(enable_tls_));
return server;
}
};

MinioTestEnvironment::MinioTestEnvironment(bool enable_tls_if_supported)
: impl_(new Impl), enable_tls_if_supported_(enable_tls_if_supported) {}
MinioTestEnvironment::MinioTestEnvironment(bool enable_tls)
: impl_(new Impl(enable_tls)) {}

MinioTestEnvironment::~MinioTestEnvironment() = default;

void MinioTestEnvironment::SetUp() {
auto pool = ::arrow::internal::GetCpuThreadPool();

auto launch_one_server =
[enable_tls_if_supported =
enable_tls_if_supported_]() -> Result<std::shared_ptr<MinioTestServer>> {
[enable_tls = impl_->enable_tls_]() -> Result<std::shared_ptr<MinioTestServer>> {
auto server = std::make_shared<MinioTestServer>();
RETURN_NOT_OK(server->Start(enable_tls_if_supported));
RETURN_NOT_OK(server->Start(enable_tls));
return server;
};
impl_->server_generator_ = [pool, launch_one_server]() {
Expand Down
12 changes: 2 additions & 10 deletions cpp/src/arrow/filesystem/s3_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"

#if defined(__linux__)
# define MINIO_SERVER_WITH_TLS
#endif // Linux

namespace arrow {
namespace fs {

Expand All @@ -44,10 +40,7 @@ class MinioTestServer {
MinioTestServer();
~MinioTestServer();

// enable_tls_if_supported = true: start Minio with TLS if MINIO_SERVER_WITH_TLS is
// defined, Currently only enabled on Linux platfrom. enable_tls_if_supported = false:
// start Minio without TLS in all platfroms
Status Start(bool enable_tls_if_supported = true);
Status Start(bool enable_tls = false);

Status Stop();

Expand All @@ -74,7 +67,7 @@ class MinioTestServer {

class MinioTestEnvironment : public ::testing::Environment {
public:
explicit MinioTestEnvironment(bool enable_tls_if_supported = true);
explicit MinioTestEnvironment(bool enable_tls = false);
~MinioTestEnvironment();

void SetUp() override;
Expand All @@ -84,7 +77,6 @@ class MinioTestEnvironment : public ::testing::Environment {
protected:
struct Impl;
std::unique_ptr<Impl> impl_;
bool enable_tls_if_supported_ = true; // by default, enable TLS if supported
};

// A global test "environment", to ensure that the S3 API is initialized before
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/filesystem/s3fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ struct ARROW_EXPORT S3Options {
/// If empty, global filesystem options will be used (see FileSystemGlobalOptions);
/// if the corresponding global filesystem option is also empty, the underlying
/// TLS library's defaults will be used.
///
/// Note this option may be ignored on some systems (Windows, macOS).
std::string tls_ca_file_path;

/// Optional path to a directory holding TLS CA
Expand All @@ -216,6 +218,8 @@ struct ARROW_EXPORT S3Options {
/// If empty, global filesystem options will be used (see FileSystemGlobalOptions);
/// if the corresponding global filesystem option is also empty, the underlying
/// TLS library's defaults will be used.
///
/// Note this option may be ignored on some systems (Windows, macOS).
std::string tls_ca_dir_path;

/// Whether to verify the S3 endpoint's TLS certificate
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/filesystem/s3fs_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class MinioFixture : public benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) override {
minio_.reset(new MinioTestServer());
ASSERT_OK(minio_->Start(/*enable_tls_if_supported=*/false));
ASSERT_OK(minio_->Start(/*enable_tls=*/false));

const char* region_str = std::getenv(kEnvAwsRegion);
if (region_str) {
Expand Down
75 changes: 44 additions & 31 deletions cpp/src/arrow/filesystem/s3fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
#include "arrow/util/range.h"
#include "arrow/util/string.h"

// TLS tests require the ability to set a custom CA file when initiating S3 client
// connections, which the AWS SDK currently only supports on Linux.
#if defined(__linux__)
# define ENABLE_TLS_TESTS
#endif // Linux

namespace arrow {
namespace fs {

Expand All @@ -95,14 +101,14 @@ ::testing::Environment* s3_env = ::testing::AddGlobalTestEnvironment(new S3Envir
::testing::Environment* minio_env =
::testing::AddGlobalTestEnvironment(new MinioTestEnvironment);

::testing::Environment* minio_env_http = ::testing::AddGlobalTestEnvironment(
new MinioTestEnvironment(/*enable_tls_if_supported=*/false));
::testing::Environment* minio_env_https =
::testing::AddGlobalTestEnvironment(new MinioTestEnvironment(/*enable_tls=*/true));

MinioTestEnvironment* GetMinioEnv(bool enable_tls_if_supported) {
if (enable_tls_if_supported) {
return ::arrow::internal::checked_cast<MinioTestEnvironment*>(minio_env);
MinioTestEnvironment* GetMinioEnv(bool enable_tls) {
if (enable_tls) {
return ::arrow::internal::checked_cast<MinioTestEnvironment*>(minio_env_https);
} else {
return ::arrow::internal::checked_cast<MinioTestEnvironment*>(minio_env_http);
return ::arrow::internal::checked_cast<MinioTestEnvironment*>(minio_env);
}
}

Expand Down Expand Up @@ -182,24 +188,6 @@ class AwsTestMixin : public ::testing::Test {
#endif
};

// Test the CalculateSSECustomerKeyMD5 in AwsTestMixin fixture, since there is AWS SDK
// function call in it
TEST_F(AwsTestMixin, CalculateSSECustomerKeyMD5) {
ASSERT_RAISES(Invalid, CalculateSSECustomerKeyMD5("")); // invalid length
ASSERT_RAISES(Invalid,
CalculateSSECustomerKeyMD5(
"1234567890123456789012345678901234567890")); // invalid length
// valid case, with some non-ASCII character and a null byte in the sse_customer_key
char sse_customer_key[32] = {};
sse_customer_key[0] = '\x40'; // '@' character
sse_customer_key[1] = '\0'; // null byte
sse_customer_key[2] = '\xFF'; // non-ASCII
sse_customer_key[31] = '\xFA'; // non-ASCII
std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key));
ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECustomerKeyMD5(sse_customer_key_string))
ASSERT_EQ(md5, "97FTa6lj0hE7lshKdBy61g=="); // valid case
}

class S3TestMixin : public AwsTestMixin {
public:
void SetUp() override {
Expand Down Expand Up @@ -228,7 +216,7 @@ class S3TestMixin : public AwsTestMixin {

protected:
Status InitServerAndClient() {
ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv(enable_tls_if_supported_)->GetOneServer());
ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv(enable_tls_)->GetOneServer());
client_config_.reset(new Aws::Client::ClientConfiguration());
client_config_->endpointOverride = ToAwsString(minio_->connect_string());
if (minio_->scheme() == "https") {
Expand All @@ -255,9 +243,11 @@ class S3TestMixin : public AwsTestMixin {
std::unique_ptr<Aws::Client::ClientConfiguration> client_config_;
Aws::Auth::AWSCredentials credentials_;
std::unique_ptr<Aws::S3::S3Client> client_;
bool enable_tls_if_supported_ =
false; // by default, use the HTTP for all the test cases, since there is the
// random failure when there is stress test against the minio HTTPS Server
// Use plain HTTP by default, as this allows us to listen on different loopback
// addresses and thus minimize the risk of address reuse (HTTPS requires the
// hostname to match the certificate's subject name, constraining us to a
// single loopback address).
bool enable_tls_ = false;
};

void AssertGetObject(Aws::S3::Model::GetObjectResult& result,
Expand All @@ -283,6 +273,27 @@ void AssertObjectContents(Aws::S3::S3Client* client, const std::string& bucket,
AssertGetObject(result, expected);
}

////////////////////////////////////////////////////////////////////////////
// Misc tests

class InternalsTest : public AwsTestMixin {};

TEST_F(InternalsTest, CalculateSSECustomerKeyMD5) {
ASSERT_RAISES(Invalid, CalculateSSECustomerKeyMD5("")); // invalid length
ASSERT_RAISES(Invalid,
CalculateSSECustomerKeyMD5(
"1234567890123456789012345678901234567890")); // invalid length
// valid case, with some non-ASCII character and a null byte in the sse_customer_key
char sse_customer_key[32] = {};
sse_customer_key[0] = '\x40'; // '@' character
sse_customer_key[1] = '\0'; // null byte
sse_customer_key[2] = '\xFF'; // non-ASCII
sse_customer_key[31] = '\xFA'; // non-ASCII
std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key));
ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECustomerKeyMD5(sse_customer_key_string))
ASSERT_EQ(md5, "97FTa6lj0hE7lshKdBy61g=="); // valid case
}

////////////////////////////////////////////////////////////////////////////
// S3Options tests

Expand Down Expand Up @@ -1344,11 +1355,11 @@ TEST_F(TestS3FS, OpenInputFile) {
}

// Minio only allows Server Side Encryption on HTTPS client connections.
#ifdef MINIO_SERVER_WITH_TLS
#ifdef ENABLE_TLS_TESTS
class TestS3FSHTTPS : public TestS3FS {
public:
void SetUp() override {
enable_tls_if_supported_ = true;
enable_tls_ = true;
TestS3FS::SetUp();
}
};
Expand All @@ -1374,6 +1385,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyMatch) {
TEST_F(TestS3FSHTTPS, SSECustomerKeyMismatch) {
std::shared_ptr<io::OutputStream> stream;
for (const auto& allow_delayed_open : {false, true}) {
ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open);
options_.allow_delayed_open = allow_delayed_open;
options_.sse_customer_key = "12345678123456781234567812345678";
MakeFileSystem();
Expand All @@ -1390,6 +1402,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyMismatch) {
TEST_F(TestS3FSHTTPS, SSECustomerKeyMissing) {
std::shared_ptr<io::OutputStream> stream;
for (const auto& allow_delayed_open : {false, true}) {
ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open);
options_.allow_delayed_open = allow_delayed_open;
options_.sse_customer_key = "12345678123456781234567812345678";
MakeFileSystem();
Expand All @@ -1415,7 +1428,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyCopyFile) {
AssertBufferEqual(*buf, "some");
ASSERT_OK(RestoreTestBucket());
}
#endif // MINIO_SERVER_WITH_TLS
#endif // ENABLE_TLS_TESTS

struct S3OptionsTestParameters {
bool background_writes{false};
Expand Down

0 comments on commit 452185c

Please sign in to comment.