From d5265306afdf8d78f37b223ee2c6d9fbac874484 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 2 Aug 2023 11:41:47 -0400 Subject: [PATCH] Bug/update libcudf to handle arrow12 changes (#13794) Contiuation of https://github.com/rapidsai/cudf/pull/13790 as more changes are needed to support Arrow 12 builds from source ( both static and shared ). This fixes issues when building against Arrow with S3 enabled, and corrects missing `acero` targets. https://github.com/NVIDIA/spark-rapids-jni/issues/1306 Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - David Wendt (https://github.com/davidwendt) - Gera Shegalov (https://github.com/gerashegalov) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13794 --- cpp/cmake/thirdparty/get_arrow.cmake | 28 ++++++++++++++++++++++++--- cpp/tests/io/arrow_io_source_test.cpp | 14 +++++++++++++- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 5934c8a2668..894dc9649e2 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -162,13 +162,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB rapids_cpm_find( Arrow ${VERSION} - GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared arrow_static parquet_static - arrow_dataset_static + GLOBAL_TARGETS arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_static + parquet_static arrow_acero_static arrow_dataset_static CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} GIT_SHALLOW TRUE SOURCE_SUBDIR cpp OPTIONS "CMAKE_VERBOSE_MAKEFILE ON" + "ARROW_ACERO ON" "ARROW_IPC ON" "ARROW_DATASET ON" "ARROW_WITH_BACKTRACE ON" @@ -221,7 +222,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB # Set this to enable `find_package(Parquet)` set(Parquet_DIR "${Arrow_DIR}") endif() - # Set this to enable `find_package(ArrowDataset)` + # Set this to enable `find_package(ArrowDataset)`. This will call find_package(ArrowAcero) for + # us set(ArrowDataset_DIR "${Arrow_DIR}") find_package(ArrowDataset REQUIRED QUIET) endif() @@ -314,6 +316,26 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB if(ENABLE_PARQUET) + set(arrow_acero_code_string + [=[ + if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) + add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) + endif() + if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) + add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) + endif() + ]=] + ) + + rapids_export( + BUILD ArrowAcero + VERSION ${VERSION} + EXPORT_SET arrow_acero_targets + GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_acero_code_string + ) + set(arrow_dataset_code_string [=[ if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp index d7f1879040b..fb9e20843ed 100644 --- a/cpp/tests/io/arrow_io_source_test.cpp +++ b/cpp/tests/io/arrow_io_source_test.cpp @@ -87,7 +87,19 @@ TEST_F(ArrowIOTest, S3FileSystem) ASSERT_EQ(1, tbl.tbl->num_columns()); // Only single column specified in reader_options ASSERT_EQ(244, tbl.tbl->num_rows()); // known number of rows from the S3 file } - CUDF_EXPECTS(arrow::fs::EnsureS3Finalized().ok(), "Failed to finalize s3 filesystem"); + if (!s3_unsupported) { + // Verify that we are using Arrow with S3, and call finalize + // https://github.com/apache/arrow/issues/36974 + // This needs to be in a separate conditional to ensure we call + // finalize after all arrow_io_source instances have been deleted. + void* whole_app = dlopen(NULL, RTLD_LAZY); + decltype(arrow::fs::EnsureS3Finalized)* close_s3_func = nullptr; + + close_s3_func = reinterpret_cast( + dlsym(whole_app, "_ZN5arrow2fs17EnsureS3FinalizedEv")); + if (close_s3_func) { CUDF_EXPECTS(close_s3_func().ok(), "Failed to finalize s3 filesystem"); } + dlclose(whole_app); + } } CUDF_TEST_PROGRAM_MAIN()