Skip to content

Commit

Permalink
apacheGH-43746: [C++] Add support for Boost 1.86
Browse files Browse the repository at this point in the history
boost/process/*.hpp are deprecated since Boost 1.86. And it seems that
it also adds backward incompatible change. We need to use
boost/process/v2/*.hpp instead.

This introduces `arrow::util::Process` for testing. It wraps
boost/process/ API. So we don't need to use boost/process/ API
directly in our tests.
  • Loading branch information
kou committed Aug 22, 2024
1 parent 6a1d052 commit 97186d3
Show file tree
Hide file tree
Showing 12 changed files with 362 additions and 297 deletions.
18 changes: 11 additions & 7 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION
# boost/container_hash/hash.hpp and support for that was removed in clang 16
set(ARROW_BOOST_REQUIRED_VERSION "1.81")
elseif(ARROW_BUILD_TESTS)
set(ARROW_BOOST_REQUIRED_VERSION "1.64")
# For boost/process/v2
set(ARROW_BOOST_REQUIRED_VERSION "1.80")
else()
set(ARROW_BOOST_REQUIRED_VERSION "1.58")
endif()
Expand Down Expand Up @@ -1316,14 +1317,17 @@ if(ARROW_USE_BOOST)
endif()
endforeach()

if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# boost/process/detail/windows/handle_workaround.hpp doesn't work
# without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
# provide __kernel_entry without winternl.h.
if(NOT TARGET Boost::process)
add_library(Boost::process INTERFACE IMPORTED)
target_link_libraries(Boost::process INTERFACE Boost::filesystem Boost::system
Boost::headers)
# Boost < 1.86 has a bug that
# boost::process::v2::process_environment::on_setup() isn't
# defined. We need to build Boost Process source to define it.
#
# See also:
# https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
# https://github.com/boostorg/process/issues/312
target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
endif()

message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
Expand Down
13 changes: 9 additions & 4 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -641,9 +641,13 @@ else()
endif()

set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
${ARROW_GTEST_GTEST})
set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
set(ARROW_TESTING_STATIC_LINK_LIBS
arrow::flatbuffers
RapidJSON
Boost::process
arrow_static
${ARROW_GTEST_GTEST})
set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
# that depend on gtest
Expand All @@ -664,9 +668,10 @@ set(ARROW_TESTING_SRCS
io/test_common.cc
ipc/test_common.cc
testing/fixed_width_test_util.cc
testing/generator.cc
testing/gtest_util.cc
testing/process.cc
testing/random.cc
testing/generator.cc
testing/util.cc)

#
Expand Down
18 changes: 4 additions & 14 deletions cpp/src/arrow/filesystem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,15 @@ if(ARROW_GCS)
EXTRA_LABELS
filesystem
EXTRA_LINK_LIBS
google-cloud-cpp::storage
Boost::filesystem
Boost::system)
google-cloud-cpp::storage)
endif()

if(ARROW_AZURE)
add_arrow_test(azurefs_test
EXTRA_LABELS
filesystem
EXTRA_LINK_LIBS
${AZURE_SDK_LINK_LIBRARIES}
Boost::filesystem
Boost::system)
${AZURE_SDK_LINK_LIBRARIES})
endif()

if(ARROW_S3)
Expand All @@ -75,11 +71,7 @@ if(ARROW_S3)
else()
list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
endif()
list(APPEND
ARROW_S3_TEST_EXTRA_LINK_LIBS
${AWSSDK_LINK_LIBRARIES}
Boost::filesystem
Boost::system)
list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
add_arrow_test(s3fs_test
SOURCES
s3fs_test.cc
Expand Down Expand Up @@ -122,9 +114,7 @@ if(ARROW_S3)
s3_test_util.cc
STATIC_LINK_LIBS
${AWSSDK_LINK_LIBRARIES}
${ARROW_BENCHMARK_LINK_LIBS}
Boost::filesystem
Boost::system)
${ARROW_BENCHMARK_LINK_LIBS})
if(ARROW_TEST_LINKAGE STREQUAL "static")
target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
else()
Expand Down
53 changes: 13 additions & 40 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,6 @@
// specific language governing permissions and limitations
// under the License.

#include <algorithm> // Missing include in boost/process

// This boost/asio/io_context.hpp include is needless for no MinGW
// build.
//
// This is for including boost/asio/detail/socket_types.hpp before any
// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
// work if windows.h is already included. boost/process.h ->
// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
// includes windows.h. boost/process/args.hpp is included before
// boost/process/async.h that includes
// boost/asio/detail/socket_types.hpp implicitly is included.
#include <boost/asio/io_context.hpp>
// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
#include <boost/process.hpp>

#include "arrow/filesystem/azurefs.h"
#include "arrow/filesystem/azurefs_internal.h"

Expand All @@ -53,6 +35,7 @@
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/process.h"
#include "arrow/testing/util.h"
#include "arrow/util/future.h"
#include "arrow/util/io_util.h"
Expand All @@ -67,7 +50,7 @@ namespace arrow {
using internal::TemporaryDir;
namespace fs {
using internal::ConcatAbstractPath;
namespace bp = boost::process;
// namespace bp = BOOST_PROCESS_V2_NAMESPACE;

using ::testing::IsEmpty;
using ::testing::Not;
Expand Down Expand Up @@ -174,42 +157,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
private:
std::unique_ptr<TemporaryDir> temp_dir_;
arrow::internal::PlatformFilename debug_log_path_;
bp::child server_process_;
std::unique_ptr<util::Process> server_process_;

using AzureEnvImpl::AzureEnvImpl;

public:
static const AzureBackend kBackend = AzureBackend::kAzurite;

~AzuriteEnv() override {
server_process_.terminate();
server_process_.wait();
}
~AzuriteEnv() override { server_process_ = nullptr; }

static Result<std::unique_ptr<AzureEnvImpl>> Make() {
auto self = std::unique_ptr<AzuriteEnv>(
new AzuriteEnv("devstoreaccount1",
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
"K1SZFPTOtr/KBHBeksoGMGw=="));
auto exe_path = bp::search_path("azurite");
if (exe_path.empty()) {
return Status::Invalid("Could not find Azurite emulator.");
}
self->server_process_ = std::make_unique<util::Process>();
ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
self->temp_dir_->path().Join("debug.log"));
auto server_process = bp::child(
boost::this_process::environment(), exe_path, "--silent", "--location",
self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(),
// For old Azurite. We can't install the latest Azurite with
// old Node.js on old Ubuntu.
"--skipApiVersionCheck");
if (!server_process.valid() || !server_process.running()) {
server_process.terminate();
server_process.wait();
return Status::Invalid("Could not start Azurite emulator.");
}
self->server_process_ = std::move(server_process);
ARROW_RETURN_NOT_OK(self->server_process_->SetArgs(
{"--silent", "--location", self->temp_dir_->path().ToString(), "--debug",
self->debug_log_path_.ToString(),
// For old Azurite. We can't install the latest Azurite with
// old Node.js on old Ubuntu.
"--skipApiVersionCheck"}));
ARROW_RETURN_NOT_OK(self->server_process_->Execute());
return self;
}

Expand Down
95 changes: 34 additions & 61 deletions cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,6 @@
// specific language governing permissions and limitations
// under the License.

#include <algorithm> // Missing include in boost/process

#define BOOST_NO_CXX98_FUNCTION_BASE // ARROW-17805
// This boost/asio/io_context.hpp include is needless for no MinGW
// build.
//
// This is for including boost/asio/detail/socket_types.hpp before any
// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
// work if windows.h is already included. boost/process.h ->
// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
// includes windows.h. boost/process/args.hpp is included before
// boost/process/async.h that includes
// boost/asio/detail/socket_types.hpp implicitly is included.
#include <boost/asio/io_context.hpp>
// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
#include <boost/process.hpp>
#include <boost/thread.hpp>

#include "arrow/filesystem/gcsfs.h"

#include <absl/time/time.h>
Expand All @@ -45,16 +25,15 @@
#include <google/cloud/storage/options.h>
#include <gtest/gtest.h>

#include <array>
#include <random>
#include <string>
#include <thread>

#include "arrow/filesystem/gcsfs_internal.h"
#include "arrow/filesystem/path_util.h"
#include "arrow/filesystem/test_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/matchers.h"
#include "arrow/testing/process.h"
#include "arrow/testing/util.h"
#include "arrow/util/future.h"
#include "arrow/util/key_value_metadata.h"
Expand All @@ -64,7 +43,6 @@ namespace fs {

namespace {

namespace bp = boost::process;
namespace gc = google::cloud;
namespace gcs = google::cloud::storage;

Expand Down Expand Up @@ -99,60 +77,55 @@ class GcsTestbench : public ::testing::Environment {
"Could not start GCS emulator."
" Used the following list of python interpreter names:");
for (const auto& interpreter : names) {
auto exe_path = bp::search_path(interpreter);
auto server_process = std::make_unique<util::Process>();
error += " " + interpreter;
if (exe_path.empty()) {
auto status = server_process->SetExecutable(interpreter);
if (!status.ok()) {
error += " (exe not found)";
continue;
}

bp::ipstream output;
server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
bp::std_err > output);
// Wait for message: "* Restarting with"
auto testbench_is_running = [&output, this](bp::child& process) {
std::string line;
std::chrono::time_point<std::chrono::steady_clock> end =
std::chrono::steady_clock::now() + std::chrono::seconds(10);
while (server_process_.valid() && server_process_.running() &&
std::chrono::steady_clock::now() < end) {
if (output.peek() && std::getline(output, line)) {
std::cerr << line << std::endl;
if (line.find("* Restarting with") != std::string::npos) return true;
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
}
return false;
};

if (testbench_is_running(server_process_)) break;
error += " (failed to start)";
server_process_.terminate();
server_process_.wait();
status = server_process->SetArgs({"-m", "testbench", "--port", port_});
if (!status.ok()) {
error += " (failed to set args: ";
error += status.ToString();
error += ")";
continue;
}

status = server_process->SetReadyErrorMessage("* Restarting with");
if (!status.ok()) {
error += " (failed to set ready error message: ";
error += status.ToString();
error += ")";
continue;
}

status = server_process->Execute();
if (!status.ok()) {
error += " (failed to launch: ";
error += status.ToString();
error += ")";
continue;
}

server_process_ = std::move(server_process);
break;
}
if (server_process_.valid() && server_process_.valid()) return;
if (server_process_) return;
error_ = std::move(error);
}

bool running() { return server_process_.running(); }
bool running() { return server_process_ && server_process_->IsRunning(); }

~GcsTestbench() override {
// Brutal shutdown, kill the full process group because the GCS testbench may launch
// additional children.
group_.terminate();
if (server_process_.valid()) {
server_process_.wait();
}
}
~GcsTestbench() override { server_process_ = nullptr; }

const std::string& port() const { return port_; }
const std::string& error() const { return error_; }

private:
std::string port_;
bp::child server_process_;
bp::group group_;
std::unique_ptr<util::Process> server_process_;
std::string error_;
};

Expand Down
Loading

0 comments on commit 97186d3

Please sign in to comment.