Skip to content

Commit

Permalink
[Backport release-2.27] Bug fix: consolidation failure on dense array…
Browse files Browse the repository at this point in the history
…s without respecting current domain (#5390) (#5397)

Backport of #5390 to release-2.27

---
TYPE: BUG
DESC: Bug fix: consolidation failure on dense arrays without respecting current domain
---------

Co-authored-by: John Kerl <[email protected]>
Co-authored-by: Agisilaos Kounelis <[email protected]>
  • Loading branch information
3 people authored Dec 4, 2024
1 parent 8d581f2 commit eb370b8
Show file tree
Hide file tree
Showing 11 changed files with 262 additions and 15 deletions.
50 changes: 50 additions & 0 deletions test/src/unit-cppapi-consolidation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
*
* Consolidation tests with the C++ API.
*/
#include "tiledb/sm/cpp_api/tiledb_experimental"

#include <test/support/tdb_catch.h>
#include "test/support/src/helpers.h"
Expand Down Expand Up @@ -488,3 +489,52 @@ TEST_CASE(
if (vfs.is_dir(array_name))
vfs.remove_dir(array_name);
}

TEST_CASE(
"C++ API: Test consolidation that respects the current domain",
"[cppapi][consolidation][current_domain]") {
std::string array_name = "cppapi_consolidation_current_domain";
remove_array(array_name);

Context ctx;

Domain domain(ctx);
auto d1 = Dimension::create<int32_t>(ctx, "d1", {{0, 1000000000}}, 50);
auto d2 = Dimension::create<int32_t>(ctx, "d2", {{0, 1000000000}}, 50);
domain.add_dimensions(d1, d2);

auto a = Attribute::create<int>(ctx, "a");

ArraySchema schema(ctx, TILEDB_DENSE);
schema.set_domain(domain);
schema.add_attributes(a);

tiledb::NDRectangle ndrect(ctx, domain);
int32_t range_one[] = {0, 2};
int32_t range_two[] = {0, 3};
ndrect.set_range(0, range_one[0], range_one[1]);
ndrect.set_range(1, range_two[0], range_two[1]);

tiledb::CurrentDomain current_domain(ctx);
current_domain.set_ndrectangle(ndrect);

tiledb::ArraySchemaExperimental::set_current_domain(
ctx, schema, current_domain);

Array::create(array_name, schema);

std::vector<int> data = {
-60, 79, -8, 100, 88, -19, -100, -111, -72, -85, 58, -41};

// Write it twice so there is something to consolidate
write_array(array_name, {0, 2, 0, 3}, data);
write_array(array_name, {0, 2, 0, 3}, data);

CHECK(tiledb::test::num_fragments(array_name) == 2);
Context ctx2;
Config config;
REQUIRE_NOTHROW(Array::consolidate(ctx2, array_name, &config));
CHECK(tiledb::test::num_fragments(array_name) == 3);

remove_array(array_name);
}
2 changes: 2 additions & 0 deletions tiledb/sm/array_schema/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include "tiledb/common/common.h"
#include "tiledb/common/pmr.h"
#include "tiledb/common/status.h"
#include "tiledb/sm/array_schema/current_domain.h"
#include "tiledb/sm/array_schema/domain.h"
#include "tiledb/sm/filesystem/uri.h"
#include "tiledb/sm/filter/filter_pipeline.h"
#include "tiledb/sm/misc/constants.h"
Expand Down
168 changes: 168 additions & 0 deletions tiledb/sm/array_schema/current_domain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,174 @@ void CurrentDomain::check_schema_sanity(
}
}

/*
* This is a templatized auxiliary for expand_to_tiles,
* dispatched on the (necessarily integral) type of a given domain slot.
*/
template <typename T>
void expand_to_tiles_aux(
CurrentDomain::dimension_size_type dimidx,
const Dimension* dimptr,
std::shared_ptr<NDRectangle> cur_dom_ndrect,
NDRange& query_ndrange) {
// No extents for string dims, etc.
if constexpr (!std::is_integral_v<T>) {
return;
}

// Find the initial lo/hi for the query range on this dimension.
auto query_slot = query_ndrange[dimidx];
auto query_slot_range = (const T*)query_slot.data();

// Find the lo/hi for the current domain on this dimension.
auto cur_dom_slot_range = (const T*)cur_dom_ndrect->get_range(dimidx).data();

// Find the lo/hi for the core domain (max domain) on this dimension.
auto dim_dom = (const T*)dimptr->domain().data();

// Find the tile extent.
auto tile_extent = *(const T*)dimptr->tile_extent().data();

// Compute tile indices: e.g. if the extent is 512 and the query lo is
// 1027, that's tile 2.
T domain_low = dim_dom[0];
auto tile_idx0 =
Dimension::tile_idx(query_slot_range[0], domain_low, tile_extent);
auto tile_idx1 =
Dimension::tile_idx(query_slot_range[1], domain_low, tile_extent);

// Round up to a multiple of the tile coords. E.g. if the query range
// starts out as (3,4) but the tile extent is 512, that will become (0,511).
T result[2];
result[0] = Dimension::tile_coord_low(tile_idx0, domain_low, tile_extent);
result[1] = Dimension::tile_coord_high(tile_idx1, domain_low, tile_extent);

/*
* Since there is a current domain, though (we assume our caller checks this),
* rounding up to a multiple of the tile extent will lead to an out-of-bounds
* read. Make the query range lo no smaller than current domain lo on this
* dimension, and make the query range hi no larger than current domain hi on
* this dimension.
*/
result[0] = std::max(result[0], cur_dom_slot_range[0]);
result[1] = std::min(result[1], cur_dom_slot_range[1]);

// Update the query range
query_slot.set_range(result, sizeof(result));
}

/* The job here is, for a given domain slot:
* Given query ranges (nominally, for dense consolidation)
* Given the core current domain (which may be empty)
* Given the core (max) domain
* Given initial query bounds
* If the current domain is empty:
* o round the query to tile boundaries
* Else:
* o round the query to tile boundaries, but not outside the current domain
*
* Example on one dim slot:
* - Say non-empty domain is (3,4)
* - Say tile extent is 512
* - Say domain is (0,99999)
* - If current domain is empty: send (3,4) to (0,511)
* - If current domain is (2, 63): send (3,4) to (2,63)
*/
void CurrentDomain::expand_to_tiles(
const Domain& domain, NDRange& query_ndrange) const {
if (query_ndrange.empty()) {
throw std::invalid_argument("Query range is empty");
}

if (this->empty()) {
domain.expand_to_tiles_when_no_current_domain(query_ndrange);
return;
}

if (this->type() != CurrentDomainType::NDRECTANGLE) {
return;
}

auto cur_dom_ndrect = this->ndrectangle();

if (query_ndrange.size() != domain.dim_num()) {
throw std::invalid_argument(
"Query range size does not match domain dimension size");
}

for (CurrentDomain::dimension_size_type dimidx = 0; dimidx < domain.dim_num();
dimidx++) {
const auto dimptr = domain.dimension_ptr(dimidx);

if (dimptr->var_size()) {
continue;
}

if (!dimptr->tile_extent()) {
continue;
}

switch (dimptr->type()) {
case Datatype::INT64:
case Datatype::DATETIME_YEAR:
case Datatype::DATETIME_MONTH:
case Datatype::DATETIME_WEEK:
case Datatype::DATETIME_DAY:
case Datatype::DATETIME_HR:
case Datatype::DATETIME_MIN:
case Datatype::DATETIME_SEC:
case Datatype::DATETIME_MS:
case Datatype::DATETIME_US:
case Datatype::DATETIME_NS:
case Datatype::DATETIME_PS:
case Datatype::DATETIME_FS:
case Datatype::DATETIME_AS:
case Datatype::TIME_HR:
case Datatype::TIME_MIN:
case Datatype::TIME_SEC:
case Datatype::TIME_MS:
case Datatype::TIME_US:
case Datatype::TIME_NS:
case Datatype::TIME_PS:
case Datatype::TIME_FS:
case Datatype::TIME_AS:
expand_to_tiles_aux<int64_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::UINT64:
expand_to_tiles_aux<uint64_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::INT32:
expand_to_tiles_aux<int32_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::UINT32:
expand_to_tiles_aux<uint32_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::INT16:
expand_to_tiles_aux<int16_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::UINT16:
expand_to_tiles_aux<uint16_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::INT8:
expand_to_tiles_aux<int8_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
case Datatype::UINT8:
expand_to_tiles_aux<uint8_t>(
dimidx, dimptr, cur_dom_ndrect, query_ndrange);
break;
default:
break;
}
}
}

} // namespace tiledb::sm

std::ostream& operator<<(
Expand Down
12 changes: 12 additions & 0 deletions tiledb/sm/array_schema/current_domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ class CurrentDomain {
*/
void check_schema_sanity(shared_ptr<Domain> schema_domain) const;

/**
* Expands the input query domain (query_ndrange) so that it aligns with the
* boundaries of the array's regular tiles. (i.e., it maps the domain onto the
* regular tile grid) in the same way as
* Domain::expand_to_tiles_when_no_current_domain(NDRange*), but while
* respecting the current domain.
*
* @param domain The domain to be considered.
* @param query_ndrange The query domain to be expanded.
*/
void expand_to_tiles(const Domain& domain, NDRange& query_ndrange) const;

private:
/* ********************************* */
/* PRIVATE ATTRIBUTES */
Expand Down
7 changes: 5 additions & 2 deletions tiledb/sm/array_schema/domain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
*/

#include "domain.h"
#include "current_domain.h"
#include "dimension.h"
#include "domain_data_ref.h"
#include "domain_typed_data_view.h"
#include "ndrectangle.h"
#include "tiledb/common/blank.h"
#include "tiledb/common/heap_memory.h"
#include "tiledb/common/logger.h"
Expand Down Expand Up @@ -390,12 +392,13 @@ void Domain::expand_ndrange(const NDRange& r1, NDRange* r2) const {
}
}

void Domain::expand_to_tiles(NDRange* ndrange) const {
void Domain::expand_to_tiles_when_no_current_domain(
NDRange& query_ndrange) const {
for (unsigned d = 0; d < dim_num_; ++d) {
const auto dim = dimension_ptrs_[d];
// Applicable only to fixed-sized dimensions
if (!dim->var_size()) {
dim->expand_to_tile(&(*ndrange)[d]);
dim->expand_to_tile(&(query_ndrange)[d]);
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion tiledb/sm/array_schema/domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class Buffer;
class ConstBuffer;
class Dimension;
class DomainTypedDataView;
class CurrentDomain;
class NDRectangle;
class FilterPipeline;
class MemoryTracker;
enum class Datatype : uint8_t;
Expand Down Expand Up @@ -277,8 +279,10 @@ class Domain {
* the array's regular tiles (i.e., it maps it on the regular tile grid).
* If the array has no regular tile grid or real domain, the function
* does not do anything.
*
* @param query_ndrange The query domain to be expanded.
*/
void expand_to_tiles(NDRange* ndrange) const;
void expand_to_tiles_when_no_current_domain(NDRange& query_ndrange) const;

/**
* Retrieves the tile coordinates of the input cell coordinates.
Expand Down
6 changes: 4 additions & 2 deletions tiledb/sm/consolidator/fragment_consolidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,8 @@ Status FragmentConsolidator::consolidate_fragments(

// Expand domain to full tiles
auto expanded_union_non_empty_domains = union_non_empty_domains;
domain.expand_to_tiles(&expanded_union_non_empty_domains);
array_for_reads->array_schema_latest().current_domain().expand_to_tiles(
domain, expanded_union_non_empty_domains);

// Now iterate all fragments and see if the consolidation can lead to data
// loss
Expand Down Expand Up @@ -756,7 +757,8 @@ Status FragmentConsolidator::create_queries(
if (dense) {
NDRange read_subarray = subarray;
auto& domain{array_for_reads->array_schema_latest().domain()};
domain.expand_to_tiles(&read_subarray);
array_for_reads->array_schema_latest().current_domain().expand_to_tiles(
domain, read_subarray);
throw_if_not_ok(query_r->set_subarray_unsafe(read_subarray));
}

Expand Down
12 changes: 8 additions & 4 deletions tiledb/sm/fragment/fragment_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -912,8 +912,10 @@ Status FragmentInfo::load(const ArrayDirectory& array_dir) {

// compute expanded non-empty domain (only for dense fragments)
auto expanded_non_empty_domain = non_empty_domain;
if (!sparse)
array_schema->domain().expand_to_tiles(&expanded_non_empty_domain);
if (!sparse) {
array_schema->current_domain().expand_to_tiles(
array_schema->domain(), expanded_non_empty_domain);
}

// Push new fragment info
single_fragment_info_vec_.emplace_back(SingleFragmentInfo(
Expand Down Expand Up @@ -1154,8 +1156,10 @@ tuple<Status, optional<SingleFragmentInfo>> FragmentInfo::load(
// (only for dense fragments)
const auto& non_empty_domain = meta->non_empty_domain();
auto expanded_non_empty_domain = non_empty_domain;
if (!sparse)
meta->array_schema()->domain().expand_to_tiles(&expanded_non_empty_domain);
if (!sparse) {
meta->array_schema()->current_domain().expand_to_tiles(
meta->array_schema()->domain(), expanded_non_empty_domain);
}

// Set fragment info
ret = SingleFragmentInfo(
Expand Down
9 changes: 5 additions & 4 deletions tiledb/sm/fragment/fragment_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ void FragmentMetadata::init_domain(const NDRange& non_empty_domain) {

// Set expanded domain
domain_ = non_empty_domain_;
domain.expand_to_tiles(&domain_);
array_schema_->current_domain().expand_to_tiles(domain, domain_);
}
}

Expand Down Expand Up @@ -2116,7 +2116,7 @@ void FragmentMetadata::load_non_empty_domain_v1_v2(Deserializer& deserializer) {
// Get expanded domain
if (!non_empty_domain_.empty()) {
domain_ = non_empty_domain_;
array_schema_->domain().expand_to_tiles(&domain_);
array_schema_->domain().expand_to_tiles_when_no_current_domain(domain_);
}
}

Expand Down Expand Up @@ -2150,7 +2150,7 @@ void FragmentMetadata::load_non_empty_domain_v3_v4(Deserializer& deserializer) {
// Get expanded domain
if (!non_empty_domain_.empty()) {
domain_ = non_empty_domain_;
array_schema_->domain().expand_to_tiles(&domain_);
array_schema_->domain().expand_to_tiles_when_no_current_domain(domain_);
}
}

Expand All @@ -2173,7 +2173,8 @@ void FragmentMetadata::load_non_empty_domain_v5_or_higher(
// Get expanded domain
if (!non_empty_domain_.empty()) {
domain_ = non_empty_domain_;
array_schema_->domain().expand_to_tiles(&domain_);
array_schema_->current_domain().expand_to_tiles(
array_schema_->domain(), domain_);
}
}

Expand Down
Loading

0 comments on commit eb370b8

Please sign in to comment.