From 4c25127b57d48c7dfd3651f073743f593a804eb9 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Fri, 23 Aug 2024 21:33:48 +0300 Subject: [PATCH 1/4] Perform aggregate-only read queries on sparse arrays with unordered layout. --- tiledb/sm/query/query.cc | 34 ++++++++++++++++++++++++---------- tiledb/sm/query/query.h | 11 ++++++++++- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 22f13d6ad9f..8fd1189db3d 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -1762,7 +1762,21 @@ bool Query::is_aggregate(std::string output_field_name) const { /* PRIVATE METHODS */ /* ****************************** */ +Layout Query::effective_layout() const { + // If the user has not set a layout, it will default to row-major, which will + // use the legacy reader on sparse arrays, and fail if aggregates were + // specified. However, if only aggregates are specified and no regular data + // buffers, the layout doesn't matter and we can transparently switch to the + // much more efficient unordered layout. + if (type_ == QueryType::READ && !array_schema_->dense() && has_aggregates() && + buffers_.empty()) { + return Layout::UNORDERED; + } + return layout_; +} + Status Query::create_strategy(bool skip_checks_serialization) { + auto layout = effective_layout(); auto params = StrategyParams( resources_, array_->memory_tracker(), @@ -1775,16 +1789,16 @@ Status Query::create_strategy(bool skip_checks_serialization) { buffers_, aggregate_buffers_, subarray_, - layout_, + layout, condition_, default_channel_aggregates_, skip_checks_serialization); if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) { - if (layout_ == Layout::COL_MAJOR || layout_ == Layout::ROW_MAJOR) { + if (layout == Layout::COL_MAJOR || layout == Layout::ROW_MAJOR) { if (!array_schema_->dense()) { return Status_QueryError( "Cannot create strategy; sparse writes do not support layout " + - layout_str(layout_)); + layout_str(layout)); } strategy_ = tdb_unique_ptr(tdb_new( OrderedWriter, @@ -1795,11 +1809,11 @@ Status Query::create_strategy(bool skip_checks_serialization) { coords_info_, remote_query_, fragment_name_)); - } else if (layout_ == Layout::UNORDERED) { + } else if (layout == Layout::UNORDERED) { if (array_schema_->dense()) { return Status_QueryError( "Cannot create strategy; dense writes do not support layout " + - layout_str(layout_)); + layout_str(layout)); } strategy_ = tdb_unique_ptr(tdb_new( UnorderedWriter, @@ -1811,7 +1825,7 @@ Status Query::create_strategy(bool skip_checks_serialization) { written_buffers_, remote_query_, fragment_name_)); - } else if (layout_ == Layout::GLOBAL_ORDER) { + } else if (layout == Layout::GLOBAL_ORDER) { strategy_ = tdb_unique_ptr(tdb_new( GlobalOrderWriter, stats_->create_child("Writer"), @@ -1826,7 +1840,7 @@ Status Query::create_strategy(bool skip_checks_serialization) { fragment_name_)); } else { return Status_QueryError( - "Cannot create strategy; unsupported layout " + layout_str(layout_)); + "Cannot create strategy; unsupported layout " + layout_str(layout)); } } else if (type_ == QueryType::READ) { bool all_dense = true; @@ -1854,7 +1868,7 @@ Status Query::create_strategy(bool skip_checks_serialization) { params, dimension_label_increasing_)); } else if (use_refactored_sparse_unordered_with_dups_reader( - layout_, *array_schema_)) { + layout, *array_schema_)) { if (Query::non_overlapping_ranges() || !subarray_.is_set() || subarray_.range_num() == 1) { strategy_ = tdb_unique_ptr(tdb_new( @@ -1870,9 +1884,9 @@ Status Query::create_strategy(bool skip_checks_serialization) { params)); } } else if ( - use_refactored_sparse_global_order_reader(layout_, *array_schema_) && + use_refactored_sparse_global_order_reader(layout, *array_schema_) && !array_schema_->dense() && - (layout_ == Layout::GLOBAL_ORDER || layout_ == Layout::UNORDERED)) { + (layout == Layout::GLOBAL_ORDER || layout == Layout::UNORDERED)) { // Using the reader for unordered queries to do deduplication. if (Query::non_overlapping_ranges() || !subarray_.is_set() || subarray_.range_num() == 1) { diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 8c78360a08f..fc55447d55d 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -879,7 +879,7 @@ class Query { /** * Returns true if the query has any aggregates on any channels */ - bool has_aggregates() { + bool has_aggregates() const { // We only need to check the default channel for now return !default_channel_aggregates_.empty(); } @@ -1149,6 +1149,15 @@ class Query { /* PRIVATE METHODS */ /* ********************************* */ + /** + * Return the layout that will be used to execute the query. + * + * This is usually set by the user but cen be overridden by TileDB in cases + * where the data order would not have an observable difference, like queries + * with only aggregates. + */ + Layout effective_layout() const; + /** * Create the strategy. * From d7be8423e5f8b88f5c8f8ab2724664d8eac9ee9b Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Fri, 23 Aug 2024 21:34:38 +0300 Subject: [PATCH 2/4] Add test coverage. --- test/src/test-cppapi-aggregates.cc | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/test/src/test-cppapi-aggregates.cc b/test/src/test-cppapi-aggregates.cc index e2286a87932..5136decc3e1 100644 --- a/test/src/test-cppapi-aggregates.cc +++ b/test/src/test-cppapi-aggregates.cc @@ -173,7 +173,7 @@ void CppAggregatesFx::generate_test_params() { nullable_ = GENERATE(true, false); allow_dups_ = GENERATE(true, false); set_qc_values_ = {false}; - layout_values_ = {TILEDB_UNORDERED}; + layout_values_ = {TILEDB_ROW_MAJOR, TILEDB_UNORDERED}; use_dim_values_ = {true, false}; if (nullable_ || !std::is_same::value) { use_dim_values_ = {false}; @@ -195,6 +195,11 @@ void CppAggregatesFx::run_all_combinations(std::function fn) { for (bool set_qc : set_qc_values_) { set_qc_ = set_qc; for (tiledb_layout_t layout : layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!dense_ && request_data && layout != TILEDB_UNORDERED) + continue; layout_ = layout; fn(); } @@ -1444,6 +1449,11 @@ TEST_CASE_METHOD( for (bool set_qc : set_qc_values_) { set_qc_ = set_qc; for (tiledb_layout_t layout : layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!dense_ && request_data && layout != TILEDB_UNORDERED) + continue; layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -1824,6 +1834,11 @@ TEMPLATE_LIST_TEST_CASE( for (bool set_qc : fx.set_qc_values_) { fx.set_qc_ = set_qc; for (tiledb_layout_t layout : fx.layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!fx.dense_ && request_data && layout != TILEDB_UNORDERED) + continue; fx.layout_ = layout; Query query(fx.ctx_, array, TILEDB_READ); @@ -2048,6 +2063,11 @@ TEST_CASE_METHOD( for (bool set_qc : set_qc_values_) { set_qc_ = set_qc; for (tiledb_layout_t layout : layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!dense_ && request_data && layout != TILEDB_UNORDERED) + continue; layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -2146,6 +2166,11 @@ TEST_CASE_METHOD( for (bool set_qc : set_qc_values_) { set_qc_ = set_qc; for (tiledb_layout_t layout : layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!dense_ && layout != TILEDB_UNORDERED) + continue; layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -2285,6 +2310,11 @@ TEST_CASE_METHOD( for (bool set_qc : set_qc_values_) { set_qc_ = set_qc; for (tiledb_layout_t layout : layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (layout != TILEDB_UNORDERED) + continue; layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -2365,6 +2395,11 @@ TEMPLATE_LIST_TEST_CASE_METHOD( for (bool set_qc : CppAggregatesFx::set_qc_values_) { CppAggregatesFx::set_qc_ = set_qc; for (tiledb_layout_t layout : CppAggregatesFx::layout_values_) { + // Filter invalid combination. The legacy reader does not support + // aggregates, and we cannot automatically switch to unordered + // reads if we are requesting both the aggregates and the data. + if (!CppAggregatesFx::dense_ && layout != TILEDB_UNORDERED) + continue; CppAggregatesFx::layout_ = layout; Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); From adeaf377663b1868db1b35b1bce2022b59be8119 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 2 Sep 2024 14:18:02 +0300 Subject: [PATCH 3/4] Update tests; remove redundant `!dense` condition in test case filtering. --- test/src/test-cppapi-aggregates.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/src/test-cppapi-aggregates.cc b/test/src/test-cppapi-aggregates.cc index 5136decc3e1..45925e1c8b4 100644 --- a/test/src/test-cppapi-aggregates.cc +++ b/test/src/test-cppapi-aggregates.cc @@ -198,8 +198,9 @@ void CppAggregatesFx::run_all_combinations(std::function fn) { // Filter invalid combination. The legacy reader does not support // aggregates, and we cannot automatically switch to unordered // reads if we are requesting both the aggregates and the data. - if (!dense_ && request_data && layout != TILEDB_UNORDERED) + if (request_data && layout != TILEDB_UNORDERED) { continue; + } layout_ = layout; fn(); } @@ -1452,8 +1453,9 @@ TEST_CASE_METHOD( // Filter invalid combination. The legacy reader does not support // aggregates, and we cannot automatically switch to unordered // reads if we are requesting both the aggregates and the data. - if (!dense_ && request_data && layout != TILEDB_UNORDERED) + if (request_data && layout != TILEDB_UNORDERED) { continue; + } layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -2066,8 +2068,9 @@ TEST_CASE_METHOD( // Filter invalid combination. The legacy reader does not support // aggregates, and we cannot automatically switch to unordered // reads if we are requesting both the aggregates and the data. - if (!dense_ && request_data && layout != TILEDB_UNORDERED) + if (request_data && layout != TILEDB_UNORDERED) { continue; + } layout_ = layout; Query query(ctx_, array, TILEDB_READ); @@ -2169,8 +2172,9 @@ TEST_CASE_METHOD( // Filter invalid combination. The legacy reader does not support // aggregates, and we cannot automatically switch to unordered // reads if we are requesting both the aggregates and the data. - if (!dense_ && layout != TILEDB_UNORDERED) + if (layout != TILEDB_UNORDERED) { continue; + } layout_ = layout; Query query(ctx_, array, TILEDB_READ); From 3d13b8dba72d24d0cbb314041f02c3694e5dec74 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 2 Sep 2024 14:18:48 +0300 Subject: [PATCH 4/4] Fix typo. --- tiledb/sm/query/query.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index fc55447d55d..5d39ed133cf 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -1152,7 +1152,7 @@ class Query { /** * Return the layout that will be used to execute the query. * - * This is usually set by the user but cen be overridden by TileDB in cases + * This is usually set by the user but can be overridden by TileDB in cases * where the data order would not have an observable difference, like queries * with only aggregates. */