Skip to content

Commit

Permalink
Refactor the Arrow tests and simplify importing pybind11. (#5247)
Browse files Browse the repository at this point in the history
[SC-52967](https://app.shortcut.com/tiledb-inc/story/52967/implement-better-solution-for-pybind11-failures)

Continuation of #5238. I updated the build system to import pybind11
using regular CMake methods, and also moved the Arrow tests to a
separate executable, to be executed in the standalone tests.

Some test failures were fixed by updating the GCS emulator.

---
TYPE: NO_HISTORY
  • Loading branch information
teo-tsirpanis authored Aug 27, 2024
1 parent 07d7cad commit 7485c93
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 127 deletions.
39 changes: 11 additions & 28 deletions .github/workflows/build-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,23 @@ jobs:
include:
- environ: 'azure'
TILEDB_AZURE: ON
TILEDB_ARROW_TESTS: ON
TILEDB_SERIALIZATION: OFF
TILEDB_S3: OFF
TILEDB_WEBP: OFF
- environ: 's3'
TILEDB_S3: ON
TILEDB_ARROW_TESTS: ON
TILEDB_SERIALIZATION: OFF
TILEDB_AZURE: OFF
TILEDB_WEBP: OFF
- environ: 'gcs'
TILEDB_GCS: ON
TILEDB_ARROW_TESTS: ON
TILEDB_SERIALIZATION: OFF
TILEDB_AZURE: OFF
TILEDB_S3: OFF
TILEDB_WEBP: OFF
- environ: 'serialization'
TILEDB_S3: OFF
TILEDB_AZURE: OFF
TILEDB_ARROW_TESTS: OFF
TILEDB_SERIALIZATION: ON
TILEDB_WEBP: ON

Expand All @@ -57,7 +53,6 @@ jobs:
TILEDB_AZURE: ${{ matrix.TILEDB_AZURE }} #azure }}
TILEDB_GCS: ${{ matrix.TILEDB_GCS }} #gcs }}
TILEDB_SERIALIZATION: ${{ matrix.TILEDB_SERIALIZATION }} #serialization }}
TILEDB_ARROW_TESTS: ${{ matrix.TILEDB_ARROW_TESTS }}
TILEDB_WEBP: ${{ matrix.TILEDB_WEBP }}
TILEDB_CMAKE_BUILD_TYPE: 'Release'
VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite'
Expand Down Expand Up @@ -110,15 +105,6 @@ jobs:
#but, it seems *we* can populate our own var from our actions, and access it in file upload...
echo TDBLOCALAPPDATA=$env:LOCALAPPDATA >> "$env:GITHUB_ENV"
- name: ARROW python needs
shell: bash
if: ${{ matrix.TILEDB_ARROW_TESTS == 'ON' }}
run: |
set -e pipefail
if [[ "$TILEDB_ARROW_TESTS" == "ON" ]]; then
pip install pyarrow pybind11 numpy
fi
- name: Prepare git
run: git config --global core.autocrlf false

Expand All @@ -136,8 +122,6 @@ jobs:
uses: seanmiddleditch/gha-setup-ninja@v4
- name: Prevent vcpkg from building debug variants
run: python $env:GITHUB_WORKSPACE/scripts/ci/patch_vcpkg_triplets.py
- name: Install dependencies from pip
run: python -m pip install pybind11[global]

- name: Configure TileDB
shell: pwsh
Expand Down Expand Up @@ -169,6 +153,9 @@ jobs:
if ($env:TILEDB_TOOLS -eq "ON") {
$bootstrapOptions = $bootstrapOptions + " -EnableTools"
}
if ($env:TILEDB_ARROW_TESTS -eq "ON") {
$bootstrapOptions = $bootstrapOptions + " -EnableArrowTests"
}
$CMakeBuildType = $env:TILEDB_CMAKE_BUILD_TYPE
if ($env:TILEDB_DEBUG -eq "ON") {
$bootstrapOptions = $bootstrapOptions + " -EnableDebug"
Expand All @@ -185,10 +172,6 @@ jobs:
# if ($env:TILEDB_CI_TSAN -eq "ON") {
# $bootstrapOptions = $bootstrapOptions + " -EnableSanitizer thread -EnableDebug"
# }
# static already added above as initial default
# if ($env:TILEDB_FORCE_BUILD_DEPS" -eq "ON") {
# $bootstrapOptions = $bootstrapOptions + " -EnableBuildDeps"
# }
if ($env:TILEDB_WERROR -eq "OFF") {
$bootstrapOptions = $bootstrapOptions + " -DisableWerror"
}
Expand Down Expand Up @@ -258,7 +241,7 @@ jobs:
}
else { #using the node/npm already present in vm
Write-Host "azurite: using vm install nodejs"
#this code path avoids overhead of download/expand/install of alternate nodejs/azurite.
#this code path avoids overhead of download/expand/install of alternate nodejs/azurite.
npm install -g azurite
Write-Host "done with 'npm install -g azurite'"
$azuriteDataPath = (Join-Path $env:TEMP "azuriteData")
Expand All @@ -283,23 +266,23 @@ jobs:
Write-Host "cmds: '$cmds'"
Invoke-Expression $cmds
if ($LastExitCode -ne 0) {
Write-Host "Tests failed. tiledb_unit exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
Write-Host "Tests failed. tiledb_unit exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
}
$cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\sm\filesystem\test\unit_vfs -d=yes"
Write-Host "cmds: '$cmds'"
Invoke-Expression $cmds
if ($LastExitCode -ne 0) {
Write-Host "Tests failed. tiledb_vfs exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
Write-Host "Tests failed. tiledb_vfs exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
}
$cmds = "$env:BUILD_BUILDDIRECTORY\test\ci\test_assert.exe -d=yes"
Invoke-Expression $cmds
if ($LastExitCode -ne 0) {
Write-Host "Tests failed. test_assert exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
Write-Host "Tests failed. test_assert exit status: " $LastExitCocde
$host.SetShouldExit($LastExitCode)
}
- name: Build examples
shell: pwsh
Expand Down
19 changes: 1 addition & 18 deletions .github/workflows/ci-linux_mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ env:
CMAKE_GENERATOR: ${{ inputs.cmake_generator }}
TILEDB_CI_BACKEND: ${{ inputs.ci_backend }}
TILEDB_CI_OS: ${{ startsWith(inputs.matrix_image, 'ubuntu-') && 'Linux' || 'macOS' }}
# Installing Python does not work on manylinux.
TILEDB_ARROW_TESTS: ${{ !inputs.manylinux && 'ON' || 'OFF' }}
TILEDB_MANYLINUX: ${{ !inputs.manylinux && 'ON' || 'OFF' }}
CXX: ${{ inputs.matrix_compiler_cxx }}
CC: ${{ inputs.matrix_compiler_cc }}
Expand Down Expand Up @@ -130,26 +128,11 @@ jobs:
yum install -y redhat-lsb-core centos-release-scl devtoolset-7 perl-IPC-Cmd
echo "source /opt/rh/devtoolset-7/enable" >> ~/.bashrc
# Need this for virtualenv and arrow tests if enabled
- name: 'Install Python'
uses: actions/setup-python@v4
if: ${{ !inputs.manylinux }}
with:
python-version: '3.9'
cache: 'pip'

# This must happen after checkout, because checkout would remove the directory.
- name: Install Ninja
if: inputs.cmake_generator == 'Ninja'
uses: seanmiddleditch/gha-setup-ninja@v4

- name: 'Set up Python dependencies'
if: ${{ !inputs.manylinux }}
run: |
set -e pipefail
python -m pip install --upgrade pip virtualenv
pip install pyarrow pybind11 numpy
- name: 'Brew setup on macOS' # x-ref c8e49ba8f8b9ce
if: ${{ startsWith(matrix.os, 'macos-') == true }}
run: |
Expand Down Expand Up @@ -217,7 +200,7 @@ jobs:
./test/ci/test_assert -d yes
./test/tiledb_unit -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
./tiledb/sm/filesystem/test/unit_vfs -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
ctest -R tiledb_timing_unit | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
###################################################
Expand Down
16 changes: 15 additions & 1 deletion .github/workflows/unit-test-runs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ jobs:
- name: Install Ninja
uses: seanmiddleditch/gha-setup-ninja@v4

# Need this for virtualenv and arrow tests if enabled
- name: 'Install Python'
uses: actions/setup-python@v5
with:
python-version: '3.9'
cache: 'pip'

- name: 'Set up Python dependencies'
run: |
set -e pipefail
python -m pip install --upgrade pip virtualenv
pip install pyarrow pybind11[global] numpy
- name: Setup MSVC toolset
uses: TheMrMilchmann/setup-msvc-dev@v3
if: ${{ startsWith(matrix.os, 'windows-') }}
Expand Down Expand Up @@ -60,7 +73,8 @@ jobs:
-G Ninja \
-DCMAKE_BUILD_TYPE=Debug \
-DTILEDB_SERIALIZATION=ON \
-DTILEDB_ASSERTIONS=ON
-DTILEDB_ASSERTIONS=ON \
-DTILEDB_ARROW_TESTS=ON
# Build all unit tests
cmake --build build --target tests -j4
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ if (TILEDB_TESTS)
add_dependencies(tests tiledb_unit)
add_dependencies(tests tiledb_regression)
add_dependencies(tests test_assert)
if(TILEDB_ARROW_TESTS)
add_dependencies(tests unit_arrow)
endif()

# C API support
add_dependencies(tests unit_capi_handle unit_capi_exception_wrapper)
Expand Down
2 changes: 1 addition & 1 deletion scripts/install-gcs-emu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ die() {
}

install_gcs(){
git clone --branch v0.36.0 --depth 1 https://github.com/googleapis/storage-testbench.git /tmp/storage-testbench
git clone --branch v0.45.0 --depth 1 https://github.com/googleapis/storage-testbench.git /tmp/storage-testbench
# Create a virtual environment and keep it active
python3 -m venv /tmp/storage-testbench-venv
source /tmp/storage-testbench-venv/bin/activate
Expand Down
2 changes: 1 addition & 1 deletion scripts/install-run-gcs-emu.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Param(

$ErrorActionPreference = "Stop"

$version = "v0.39.0"
$version = "v0.45.0"
$testbenchPath = "$env:TEMP\storage-testbench-$version"
$venvPath = "$env:TEMP\storage-testbench-venv"

Expand Down
127 changes: 49 additions & 78 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,62 +28,6 @@

find_package(Catch2 REQUIRED)

# Arrow integration test config and dependencies
# Override from environment if not set in cache
if (NOT DEFINED $CACHE{TILEDB_ARROW_TESTS})
if ($ENV{TILEDB_ARROW_TESTS})
message(STATUS "Enabling Apache Arrow integration test")
# Technically this would be an "option", but we want conditional override from ENV
set(TILEDB_ARROW_TESTS ON CACHE BOOL "Enable Arrow tests (requires Python with pyarrow and numpy)" FORCE)
mark_as_advanced(TILEDB_ARROW_TESTS)
endif()
endif()

if (${TILEDB_ARROW_TESTS})
# Reworked FindPython was introducted in 3.12 with features used below
if (CMAKE_VERSION VERSION_LESS "3.12")
message(FATAL_ERROR "CMake >= 3.12 is required for TileDB Arrow Tests. (found ${CMAKE_VERSION})")
endif()
# Tell CMake to check the Python registry entry last on Windows
set(Python_FIND_REGISTRY "LAST")
# Tell CMake to prefer Python from the PATH
set(Python_FIND_STRATEGY "LOCATION")
find_package(Python COMPONENTS Interpreter Development REQUIRED)
find_package(pybind11)

message(STATUS "Configuring Apache Arrow integration test with Python ${Python_VERSION} (${Python_EXECUTABLE})")

# If we can't find the pybind11 cmake config (not available in pypi yet)
# try to find with the current executable.
if (NOT ${pybind11_FOUND})
# Get the include arguments from the python executable (has "-I" compiler option)
execute_process(COMMAND ${Python_EXECUTABLE} -m pybind11 --includes
OUTPUT_VARIABLE CMD_PYBIND11_INCLUDE
RESULT_VARIABLE CMD_PYBIND11_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE)
if (${CMD_PYBIND11_RESULT})
message(FATAL_ERROR "Unable to find pybind11 via cmake or 'python3 -m pybind11 --includes'")
endif()

# Convert args to list
separate_arguments(CMD_PARSED_INCLUDES NATIVE_COMMAND ${CMD_PYBIND11_INCLUDE})
# Remove the "-I" from each include
foreach(INCL_PATH IN LISTS CMD_PARSED_INCLUDES)
string(REPLACE "-I" "" INCL_PATH ${INCL_PATH})
list(APPEND PYBIND11_INCLUDE_DIRECTORIES ${INCL_PATH})
endforeach()

file(TO_CMAKE_PATH "${Python_SITELIB}" SAFE_Python_SITELIB)
set(pybind11_FOUND TRUE CACHE BOOL "pybind11 include path found")
add_library(pybind11::embed INTERFACE IMPORTED)
target_include_directories(pybind11::embed INTERFACE ${PYBIND11_INCLUDE_DIRECTORIES})
target_link_libraries(pybind11::embed INTERFACE Python::Python)
target_compile_definitions(pybind11::embed INTERFACE -DTILEDB_PYTHON_SITELIB_PATH="${SAFE_Python_SITELIB}")
endif()
file(TO_CMAKE_PATH ${CMAKE_CURRENT_BINARY_DIR} SAFE_CURRENT_BINARY_DIR)
target_compile_definitions(pybind11::embed INTERFACE -DTILEDB_PYTHON_UNIT_PATH="${SAFE_CURRENT_BINARY_DIR}")
endif()

# Include TileDB core header directories
set(TILEDB_CORE_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
# Include the C API directory so that the C++ 'tiledb' file can directly
Expand Down Expand Up @@ -259,17 +203,15 @@ if (TILEDB_SERIALIZATION)
)
endif()

if (TILEDB_ARROW_TESTS)
list(APPEND TILEDB_UNIT_TEST_SOURCES
src/unit-arrow.cc
${CMAKE_SOURCE_DIR}/tiledb/sm/cpp_api/arrow_io_impl.h
)
endif()

if (TILEDB_VERBOSE)
add_definitions(-DTILEDB_VERBOSE)
endif()

# We want tests to continue as normal even as the API is changing,
# so don't warn for deprecations, since they'll be escalated to errors.
if (NOT MSVC)
add_compile_options(-Wno-deprecated-declarations)
endif()

# unit test executable
add_executable(
Expand All @@ -279,16 +221,8 @@ add_executable(
"src/unit.cc"
)

add_dependencies(tiledb_unit tiledb_test_support_lib)

target_compile_options(tiledb_unit PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")

# We want tests to continue as normal even as the API is changing,
# so don't warn for deprecations, since they'll be escalated to errors.
if (NOT MSVC)
target_compile_options(tiledb_unit PRIVATE -Wno-deprecated-declarations)
endif()

target_include_directories(
tiledb_unit BEFORE PRIVATE
${TILEDB_CORE_INCLUDE_DIR}
Expand Down Expand Up @@ -329,13 +263,6 @@ if (TILEDB_SERIALIZATION)
target_compile_definitions(tiledb_unit PRIVATE -DTILEDB_SERIALIZATION)
endif()

if (TILEDB_ARROW_TESTS)
target_link_libraries(tiledb_unit PRIVATE pybind11::embed)

# install the python helper next to the executable for import
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/unit_arrow.py" "${CMAKE_CURRENT_BINARY_DIR}" COPYONLY)
endif()

if (TILEDB_WEBP)
target_compile_definitions(tiledb_unit PRIVATE -DTILEDB_WEBP)
find_package(ZLIB) # We need PNG to use our Zlib so that static link works correctly if applicable
Expand Down Expand Up @@ -363,6 +290,50 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Linux" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU
)
endif()

if (TILEDB_ARROW_TESTS)
# Tell CMake to check the Python registry entry last on Windows
set(Python_FIND_REGISTRY "LAST")
# Tell CMake to prefer Python from the PATH
set(Python_FIND_STRATEGY "LOCATION")
find_package(Python COMPONENTS Interpreter Development REQUIRED)
find_package(pybind11 REQUIRED)
message(STATUS "Configuring Apache Arrow integration test with Python ${Python_VERSION} (${Python_EXECUTABLE})")

add_executable(
unit_arrow EXCLUDE_FROM_ALL
$<TARGET_OBJECTS:TILEDB_CORE_OBJECTS>
src/unit-arrow.cc
${CMAKE_SOURCE_DIR}/tiledb/sm/cpp_api/arrow_io_impl.h
)

target_link_libraries(unit_arrow
PUBLIC
TILEDB_CORE_OBJECTS_ILIB
TILEDB_CORE_OBJECTS
Catch2::Catch2WithMain
pybind11::embed
tiledb_test_support_lib
configuration_definitions
)

file(TO_CMAKE_PATH ${CMAKE_CURRENT_BINARY_DIR} SAFE_CURRENT_BINARY_DIR)
target_compile_definitions(unit_arrow PRIVATE -DTILEDB_PYTHON_UNIT_PATH="${SAFE_CURRENT_BINARY_DIR}")
# install the python helper next to the executable for import
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/unit_arrow.py" "${CMAKE_CURRENT_BINARY_DIR}" COPYONLY)

target_include_directories(
unit_arrow BEFORE PRIVATE
${TILEDB_CORE_INCLUDE_DIR}
${TILEDB_EXPORT_HEADER_DIR}
)

add_test(
NAME "unit_arrow"
COMMAND unit_arrow --durations=yes
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
endif()

# Only produce timing tests for UNIX based systems (faketime constraint)
find_library(
LIBFAKETIME
Expand Down

0 comments on commit 7485c93

Please sign in to comment.