Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use unordered layout for aggregate-only read queries on sparse arrays. #5255

Merged
merged 4 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion test/src/test-cppapi-aggregates.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ void CppAggregatesFx<T>::generate_test_params() {
nullable_ = GENERATE(true, false);
allow_dups_ = GENERATE(true, false);
set_qc_values_ = {false};
layout_values_ = {TILEDB_UNORDERED};
layout_values_ = {TILEDB_ROW_MAJOR, TILEDB_UNORDERED};
use_dim_values_ = {true, false};
if (nullable_ || !std::is_same<T, uint64_t>::value) {
use_dim_values_ = {false};
Expand All @@ -195,6 +195,12 @@ void CppAggregatesFx<T>::run_all_combinations(std::function<void()> fn) {
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
fn();
}
Expand Down Expand Up @@ -1444,6 +1450,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -1824,6 +1836,11 @@ TEMPLATE_LIST_TEST_CASE(
for (bool set_qc : fx.set_qc_values_) {
fx.set_qc_ = set_qc;
for (tiledb_layout_t layout : fx.layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (!fx.dense_ && request_data && layout != TILEDB_UNORDERED)
continue;
fx.layout_ = layout;
Query query(fx.ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2048,6 +2065,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2146,6 +2169,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2285,6 +2314,11 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (layout != TILEDB_UNORDERED)
continue;
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2365,6 +2399,11 @@ TEMPLATE_LIST_TEST_CASE_METHOD(
for (bool set_qc : CppAggregatesFx<T>::set_qc_values_) {
CppAggregatesFx<T>::set_qc_ = set_qc;
for (tiledb_layout_t layout : CppAggregatesFx<T>::layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (!CppAggregatesFx<T>::dense_ && layout != TILEDB_UNORDERED)
continue;
CppAggregatesFx<T>::layout_ = layout;
Query query(CppAggregatesFx<T>::ctx_, array, TILEDB_READ);

Expand Down
34 changes: 24 additions & 10 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1762,7 +1762,21 @@ bool Query::is_aggregate(std::string output_field_name) const {
/* PRIVATE METHODS */
/* ****************************** */

Layout Query::effective_layout() const {
// If the user has not set a layout, it will default to row-major, which will
// use the legacy reader on sparse arrays, and fail if aggregates were
// specified. However, if only aggregates are specified and no regular data
// buffers, the layout doesn't matter and we can transparently switch to the
// much more efficient unordered layout.
if (type_ == QueryType::READ && !array_schema_->dense() && has_aggregates() &&
buffers_.empty()) {
return Layout::UNORDERED;
}
return layout_;
}

Status Query::create_strategy(bool skip_checks_serialization) {
auto layout = effective_layout();
auto params = StrategyParams(
resources_,
array_->memory_tracker(),
Expand All @@ -1775,16 +1789,16 @@ Status Query::create_strategy(bool skip_checks_serialization) {
buffers_,
aggregate_buffers_,
subarray_,
layout_,
layout,
condition_,
default_channel_aggregates_,
skip_checks_serialization);
if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) {
if (layout_ == Layout::COL_MAJOR || layout_ == Layout::ROW_MAJOR) {
if (layout == Layout::COL_MAJOR || layout == Layout::ROW_MAJOR) {
if (!array_schema_->dense()) {
return Status_QueryError(
"Cannot create strategy; sparse writes do not support layout " +
layout_str(layout_));
layout_str(layout));
}
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
OrderedWriter,
Expand All @@ -1795,11 +1809,11 @@ Status Query::create_strategy(bool skip_checks_serialization) {
coords_info_,
remote_query_,
fragment_name_));
} else if (layout_ == Layout::UNORDERED) {
} else if (layout == Layout::UNORDERED) {
if (array_schema_->dense()) {
return Status_QueryError(
"Cannot create strategy; dense writes do not support layout " +
layout_str(layout_));
layout_str(layout));
}
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
UnorderedWriter,
Expand All @@ -1811,7 +1825,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
written_buffers_,
remote_query_,
fragment_name_));
} else if (layout_ == Layout::GLOBAL_ORDER) {
} else if (layout == Layout::GLOBAL_ORDER) {
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
GlobalOrderWriter,
stats_->create_child("Writer"),
Expand All @@ -1826,7 +1840,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
fragment_name_));
} else {
return Status_QueryError(
"Cannot create strategy; unsupported layout " + layout_str(layout_));
"Cannot create strategy; unsupported layout " + layout_str(layout));
}
} else if (type_ == QueryType::READ) {
bool all_dense = true;
Expand Down Expand Up @@ -1854,7 +1868,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
params,
dimension_label_increasing_));
} else if (use_refactored_sparse_unordered_with_dups_reader(
layout_, *array_schema_)) {
layout, *array_schema_)) {
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
subarray_.range_num() == 1) {
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
Expand All @@ -1870,9 +1884,9 @@ Status Query::create_strategy(bool skip_checks_serialization) {
params));
}
} else if (
use_refactored_sparse_global_order_reader(layout_, *array_schema_) &&
use_refactored_sparse_global_order_reader(layout, *array_schema_) &&
!array_schema_->dense() &&
(layout_ == Layout::GLOBAL_ORDER || layout_ == Layout::UNORDERED)) {
(layout == Layout::GLOBAL_ORDER || layout == Layout::UNORDERED)) {
// Using the reader for unordered queries to do deduplication.
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
subarray_.range_num() == 1) {
Expand Down
11 changes: 10 additions & 1 deletion tiledb/sm/query/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ class Query {
/**
* Returns true if the query has any aggregates on any channels
*/
bool has_aggregates() {
bool has_aggregates() const {
// We only need to check the default channel for now
return !default_channel_aggregates_.empty();
}
Expand Down Expand Up @@ -1149,6 +1149,15 @@ class Query {
/* PRIVATE METHODS */
/* ********************************* */

/**
* Return the layout that will be used to execute the query.
*
* This is usually set by the user but can be overridden by TileDB in cases
* where the data order would not have an observable difference, like queries
* with only aggregates.
*/
Layout effective_layout() const;

/**
* Create the strategy.
*
Expand Down
Loading