Skip to content

Commit

Permalink
Bug/update libcudf to handle arrow12 changes (#13794)
Browse files Browse the repository at this point in the history
Contiuation of #13790 as more changes are needed to support Arrow 12 builds from source ( both static and shared ). This fixes issues when building against Arrow with S3 enabled, and corrects missing `acero` targets.

NVIDIA/spark-rapids-jni#1306

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Gera Shegalov (https://github.com/gerashegalov)
  - Bradley Dice (https://github.com/bdice)

URL: #13794
  • Loading branch information
robertmaynard authored Aug 2, 2023
1 parent c412480 commit d526530
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
28 changes: 25 additions & 3 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

rapids_cpm_find(
Arrow ${VERSION}
GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared arrow_static parquet_static
arrow_dataset_static
GLOBAL_TARGETS arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_static
parquet_static arrow_acero_static arrow_dataset_static
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-${VERSION}
GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
OPTIONS "CMAKE_VERBOSE_MAKEFILE ON"
"ARROW_ACERO ON"
"ARROW_IPC ON"
"ARROW_DATASET ON"
"ARROW_WITH_BACKTRACE ON"
Expand Down Expand Up @@ -221,7 +222,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
# Set this to enable `find_package(Parquet)`
set(Parquet_DIR "${Arrow_DIR}")
endif()
# Set this to enable `find_package(ArrowDataset)`
# Set this to enable `find_package(ArrowDataset)`. This will call find_package(ArrowAcero) for
# us
set(ArrowDataset_DIR "${Arrow_DIR}")
find_package(ArrowDataset REQUIRED QUIET)
endif()
Expand Down Expand Up @@ -314,6 +316,26 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

if(ENABLE_PARQUET)

set(arrow_acero_code_string
[=[
if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared))
add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared)
endif()
if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static))
add_library(arrow_acero_static ALIAS cudf::arrow_acero_static)
endif()
]=]
)

rapids_export(
BUILD ArrowAcero
VERSION ${VERSION}
EXPORT_SET arrow_acero_targets
GLOBAL_TARGETS arrow_acero_shared arrow_acero_static
NAMESPACE cudf::
FINAL_CODE_BLOCK arrow_acero_code_string
)

set(arrow_dataset_code_string
[=[
if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared))
Expand Down
14 changes: 13 additions & 1 deletion cpp/tests/io/arrow_io_source_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,19 @@ TEST_F(ArrowIOTest, S3FileSystem)
ASSERT_EQ(1, tbl.tbl->num_columns()); // Only single column specified in reader_options
ASSERT_EQ(244, tbl.tbl->num_rows()); // known number of rows from the S3 file
}
CUDF_EXPECTS(arrow::fs::EnsureS3Finalized().ok(), "Failed to finalize s3 filesystem");
if (!s3_unsupported) {
// Verify that we are using Arrow with S3, and call finalize
// https://github.com/apache/arrow/issues/36974
// This needs to be in a separate conditional to ensure we call
// finalize after all arrow_io_source instances have been deleted.
void* whole_app = dlopen(NULL, RTLD_LAZY);
decltype(arrow::fs::EnsureS3Finalized)* close_s3_func = nullptr;

close_s3_func = reinterpret_cast<decltype(close_s3_func)>(
dlsym(whole_app, "_ZN5arrow2fs17EnsureS3FinalizedEv"));
if (close_s3_func) { CUDF_EXPECTS(close_s3_func().ok(), "Failed to finalize s3 filesystem"); }
dlclose(whole_app);
}
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit d526530

Please sign in to comment.