diff --git a/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10.ok b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10.ok new file mode 100644 index 00000000000..e69de29bb2d diff --git a/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/__fragment_metadata.tdb b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/__fragment_metadata.tdb new file mode 100644 index 00000000000..ab71eefd5b5 Binary files /dev/null and b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/__fragment_metadata.tdb differ diff --git a/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/a0.tdb b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/a0.tdb new file mode 100644 index 00000000000..fed5d540e2d Binary files /dev/null and b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/a0.tdb differ diff --git a/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0.tdb b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0.tdb new file mode 100644 index 00000000000..25786b216e6 Binary files /dev/null and b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0.tdb differ diff --git a/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0_var.tdb b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0_var.tdb new file mode 100644 index 00000000000..1b1cb4d44c5 Binary files /dev/null and b/test/inputs/arrays/zero_var_chunks_v10/__1693562063524_1693562063524_0fcdefaa40d34ff2a4b09153472fbdc2_10/d0_var.tdb differ diff --git a/test/inputs/arrays/zero_var_chunks_v10/__lock.tdb b/test/inputs/arrays/zero_var_chunks_v10/__lock.tdb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/test/inputs/arrays/zero_var_chunks_v10/__schema/__1693562063520_1693562063520_87f6551825c94139b3000f0d6b7c0ac0 b/test/inputs/arrays/zero_var_chunks_v10/__schema/__1693562063520_1693562063520_87f6551825c94139b3000f0d6b7c0ac0 new file mode 100644 index 00000000000..85b879dbd0a Binary files /dev/null and b/test/inputs/arrays/zero_var_chunks_v10/__schema/__1693562063520_1693562063520_87f6551825c94139b3000f0d6b7c0ac0 differ diff --git a/test/regression/CMakeLists.txt b/test/regression/CMakeLists.txt index 53d1dbfdd46..3fa7e3c2886 100644 --- a/test/regression/CMakeLists.txt +++ b/test/regression/CMakeLists.txt @@ -46,6 +46,7 @@ if (TILEDB_CPP_API) list(APPEND SOURCES targets/sc-24079.cc) list(APPEND SOURCES targets/sc-25116.cc) list(APPEND SOURCES targets/sc-29682.cc) + list(APPEND SOURCES targets/sc-33480.cc) endif() add_executable(tiledb_regression @@ -54,6 +55,10 @@ add_executable(tiledb_regression ${SOURCES} ) +target_compile_definitions(tiledb_regression PRIVATE + -DTILEDB_TEST_INPUTS_DIR="${CMAKE_SOURCE_DIR}/test/inputs/" +) + if (NOT MSVC) target_compile_options(tiledb_regression PRIVATE -Wno-deprecated-declarations) endif() diff --git a/test/regression/targets/sc-33480.cc b/test/regression/targets/sc-33480.cc new file mode 100644 index 00000000000..b8ae2f5fa3b --- /dev/null +++ b/test/regression/targets/sc-33480.cc @@ -0,0 +1,76 @@ +#include + +#include + +using namespace tiledb; + +TEST_CASE("0 var chunks", "[unfiltering][bug][sc33480]") { + // NOTE: This regression test will not fail on a Mac M1 because on that + // platform, a division by 0 will not generate a segfault but return 0. + + // The following code created the zero_var_chunks_v10 array for this test. + // #include + // using namespace tiledb; + + // int main() { + // Context ctx; + + // Domain domain(ctx); + // domain.add_dimension( + // Dimension::create(ctx, "d", TILEDB_STRING_ASCII, nullptr, nullptr)); + // ArraySchema schema(ctx, TILEDB_SPARSE); + // schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, + // TILEDB_ROW_MAJOR}}); schema.add_attribute(Attribute::create(ctx, + // "a")); Array::create("zero_var_chunks_v10", schema); + + // std::vector d = {}; + // std::vector d_offsets = {0}; + // std::vector data = {1}; + + // Array array(ctx, "zero_var_chunks_v10", TILEDB_WRITE); + // Query query(ctx, array, TILEDB_WRITE); + // query.set_layout(TILEDB_UNORDERED) + // .set_data_buffer("a", data) + // .set_data_buffer("d", d) + // .set_offsets_buffer("d", d_offsets); + // query.submit(); + // array.close(); + // return 0; + // } + + // This array has a var file for the "d" dimension with 0 chunks. This was + // only possible if a fragment had all empty values for a variable string in + // versions earlier than v10. + std::string array_name( + std::string(TILEDB_TEST_INPUTS_DIR) + "/arrays/zero_var_chunks_v10"); + Context ctx; + + // Prepare the array for reading. + Array array(ctx, array_name, TILEDB_READ); + + // Prepare the query. + Query query(ctx, array, TILEDB_READ); + + // Prepare the vector that will hold the result. + std::vector d(10); + std::vector d_offsets(10); + std::vector a(10); + query.set_layout(TILEDB_UNORDERED) + .set_data_buffer("d", d) + .set_offsets_buffer("d", d_offsets) + .set_data_buffer("a", a); + + // Submit the query and close the array. + query.submit(); + array.close(); + + // Validate the results. This array has one cell at coordinate '' with + // value 1. + auto res = query.result_buffer_elements(); + CHECK(res["d"].first == 1); + CHECK(res["d"].second == 0); + CHECK(res["a"].first == 0); + CHECK(res["a"].second == 1); + CHECK(d_offsets[0] == 0); + CHECK(a[0] == 1); +} diff --git a/test/src/unit-backwards_compat.cc b/test/src/unit-backwards_compat.cc index 22087eec2b7..c71561c71a3 100644 --- a/test/src/unit-backwards_compat.cc +++ b/test/src/unit-backwards_compat.cc @@ -1363,4 +1363,4 @@ TEST_CASE( vfs.remove_dir(get_fragment_dir(array_read2.uri())); vfs.remove_dir(get_commit_dir(array_read2.uri())); vfs.remove_dir(schema_folder); -} +} \ No newline at end of file diff --git a/tiledb/sm/query/readers/CMakeLists.txt b/tiledb/sm/query/readers/CMakeLists.txt index c8c4e1e6ae6..bc643fb8e21 100644 --- a/tiledb/sm/query/readers/CMakeLists.txt +++ b/tiledb/sm/query/readers/CMakeLists.txt @@ -28,3 +28,5 @@ include(common NO_POLICY_SCOPE) include(object_library) add_subdirectory(aggregators) + +add_test_subdirectory() \ No newline at end of file diff --git a/tiledb/sm/query/readers/reader_base.cc b/tiledb/sm/query/readers/reader_base.cc index 9c368af45a7..2025a2900f0 100644 --- a/tiledb/sm/query/readers/reader_base.cc +++ b/tiledb/sm/query/readers/reader_base.cc @@ -1004,19 +1004,6 @@ Status ReaderBase::unfilter_tile( return Status::Ok(); } -tuple ReaderBase::compute_chunk_min_max( - const uint64_t num_chunks, - const uint64_t num_range_threads, - const uint64_t thread_idx) const { - auto t_part_num = std::min(num_chunks, num_range_threads); - auto t_min = (thread_idx * num_chunks + t_part_num - 1) / t_part_num; - auto t_max = std::min( - ((thread_idx + 1) * num_chunks + t_part_num - 1) / t_part_num, - num_chunks); - - return {t_min, t_max}; -} - uint64_t ReaderBase::offsets_bytesize() const { return offsets_bitsize_ == 32 ? sizeof(uint32_t) : constants::cell_var_offset_size; diff --git a/tiledb/sm/query/readers/reader_base.h b/tiledb/sm/query/readers/reader_base.h index 0df91a2444e..bdb7d5f9be4 100644 --- a/tiledb/sm/query/readers/reader_base.h +++ b/tiledb/sm/query/readers/reader_base.h @@ -158,6 +158,41 @@ class ReaderBase : public StrategyBase { const std::vector>& frag_tile_domains, std::map>& result_space_tiles); + /** + * Computes the minimum and maximum indexes of tile chunks to process based on + * the available threads. + * + * @param num_chunks Total number of chunks in a tile + * @param num_range_threads Total number of range threads. + * @param range_thread_idx Current range thread index. + * @return {min, max} + */ + static tuple compute_chunk_min_max( + const uint64_t num_chunks, + const uint64_t num_range_threads, + const uint64_t thread_idx) { + if (num_range_threads == 0) { + throw std::runtime_error("Number of range thread value is 0"); + } + + if (thread_idx > num_range_threads - 1) { + throw std::runtime_error( + "Range thread index is greater than number of range threads"); + } + + if (num_chunks == 0) { + return {0, 0}; + } + + auto t_part_num = std::min(num_chunks, num_range_threads); + auto t_min = (thread_idx * num_chunks + t_part_num - 1) / t_part_num; + auto t_max = std::min( + ((thread_idx + 1) * num_chunks + t_part_num - 1) / t_part_num, + num_chunks); + + return {t_min, t_max}; + } + /* ********************************* */ /* PUBLIC METHODS */ /* ********************************* */ @@ -517,20 +552,6 @@ class ReaderBase : public StrategyBase { const ChunkData& tile_chunk_var_data, const ChunkData& tile_chunk_validity_data) const; - /** - * Computes the minimum and maximum indexes of tile chunks to process based on - * the available threads. - * - * @param num_chunks Total number of chunks in a tile - * @param num_range_threads Total number of range threads. - * @param range_thread_idx Current range thread index. - * @return {min, max} - */ - tuple compute_chunk_min_max( - const uint64_t num_chunks, - const uint64_t num_range_threads, - const uint64_t thread_idx) const; - /** * Returns the configured bytesize for var-sized attribute offsets */ diff --git a/tiledb/sm/query/readers/test/CMakeLists.txt b/tiledb/sm/query/readers/test/CMakeLists.txt new file mode 100644 index 00000000000..f3367cdaf7b --- /dev/null +++ b/tiledb/sm/query/readers/test/CMakeLists.txt @@ -0,0 +1,31 @@ +# +# tiledb/sm/query/readers/test/CMakeLists.txt +# +# The MIT License +# +# Copyright (c) 2023 TileDB, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +include(unit_test) + +commence(unit_test readers) + this_target_sources(main.cc unit_reader_base.cc) +conclude(unit_test) diff --git a/tiledb/sm/query/readers/test/compile_result_tile_main.cc b/tiledb/sm/query/readers/test/main.cc similarity index 87% rename from tiledb/sm/query/readers/test/compile_result_tile_main.cc rename to tiledb/sm/query/readers/test/main.cc index 4c5a276afe2..871aafcdd1a 100644 --- a/tiledb/sm/query/readers/test/compile_result_tile_main.cc +++ b/tiledb/sm/query/readers/test/main.cc @@ -1,5 +1,5 @@ /** - * @file compile_result_tile_main.cc + * @file tiledb/sm/query/readers/test/main.cc * * @section LICENSE * @@ -24,11 +24,11 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines a test `main()` */ -#include "../result_tile.h" - -int main() { - (void)sizeof(tiledb::sm::ResultTile); - return 0; -} \ No newline at end of file +#define CATCH_CONFIG_MAIN +#include diff --git a/tiledb/sm/query/readers/test/unit_reader_base.cc b/tiledb/sm/query/readers/test/unit_reader_base.cc new file mode 100644 index 00000000000..b68d569b580 --- /dev/null +++ b/tiledb/sm/query/readers/test/unit_reader_base.cc @@ -0,0 +1,85 @@ +/** + * @file unit_reader_base.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the `ReaderBase` class. + */ + +#include "tiledb/common/common.h" +#include "tiledb/sm/query/readers/reader_base.h" + +#include + +using namespace tiledb::sm; + +TEST_CASE( + "ReaderBase::compute_chunk_min_max valid inputs", + "[readerbase][compute_chunk_min_max][valid]") { + uint64_t num_chunks{0}; + uint64_t num_range_threads{2}; + uint64_t thread_idx{0}; + std::tuple expected_min_max{0, 0}; + + SECTION("Three chunks first thread") { + num_chunks = 3; + thread_idx = 0; + expected_min_max = {0, 2}; + } + + SECTION("Three chunks second thread") { + num_chunks = 3; + thread_idx = 1; + expected_min_max = {2, 3}; + } + + SECTION("0 chunks") { + num_chunks = 0; + thread_idx = 1; + expected_min_max = {0, 0}; + } + + auto res = ReaderBase::compute_chunk_min_max( + num_chunks, num_range_threads, thread_idx); + CHECK(res == expected_min_max); +} + +TEST_CASE( + "ReaderBase::compute_chunk_min_max invalid inputs", + "[readerbase][compute_chunk_min_max][invalid]") { + SECTION("No range threads") { + CHECK_THROWS_WITH( + ReaderBase::compute_chunk_min_max(10, 0, 0), + "Number of range thread value is 0"); + } + + SECTION("Invalid range thread index") { + CHECK_THROWS_WITH( + ReaderBase::compute_chunk_min_max(10, 1, 1), + "Range thread index is greater than number of range threads"); + } +} \ No newline at end of file