From bce2710a65ef5570c733b1f0b4baa66ff09b982e Mon Sep 17 00:00:00 2001 From: Robert Bindar Date: Mon, 9 Sep 2024 14:04:01 +0300 Subject: [PATCH] split loaded metadata work --- test/src/unit-tile-metadata.cc | 99 +- tiledb/sm/array/array.cc | 1 + tiledb/sm/fragment/fragment_metadata.cc | 1235 ++++------------- tiledb/sm/fragment/fragment_metadata.h | 345 +++-- .../sm/fragment/loaded_fragment_metadata.cc | 691 +++++++++ tiledb/sm/fragment/loaded_fragment_metadata.h | 88 ++ tiledb/sm/query/legacy/reader.cc | 3 +- .../query/readers/attribute_order_validator.h | 61 +- tiledb/sm/query/readers/dense_reader.cc | 3 +- .../query/readers/ordered_dim_label_reader.cc | 40 +- .../readers/sparse_global_order_reader.cc | 5 +- .../query/readers/sparse_index_reader_base.cc | 3 +- .../sparse_unordered_with_dups_reader.cc | 5 +- .../sm/query/writers/global_order_writer.cc | 42 +- tiledb/sm/query/writers/ordered_writer.cc | 60 +- tiledb/sm/query/writers/unordered_writer.cc | 19 +- tiledb/sm/query/writers/writer_base.cc | 77 +- tools/src/commands/info_command.cc | 4 +- 18 files changed, 1593 insertions(+), 1188 deletions(-) diff --git a/test/src/unit-tile-metadata.cc b/test/src/unit-tile-metadata.cc index 94b81b1470b..6690dcf704e 100644 --- a/test/src/unit-tile-metadata.cc +++ b/test/src/unit-tile-metadata.cc @@ -332,13 +332,15 @@ struct CPPFixedTileMetadataFx { // Validate no min. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_min_as("d", tile_idx), + frag_meta[f]->loaded_metadata()->get_tile_min_as( + "d", tile_idx), "FragmentMetadata: Trying to access tile min metadata that's not " "present"); // Validate no max. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_max_as("d", tile_idx), + frag_meta[f]->loaded_metadata()->get_tile_max_as( + "d", tile_idx), "FragmentMetadata: Trying to access tile max metadata that's not " "present"); @@ -348,7 +350,8 @@ struct CPPFixedTileMetadataFx { CHECK(*(int64_t*)sum == correct_sum); // Validate the tile metadata structure. - auto full_tile_data = frag_meta[f]->get_tile_metadata("d", tile_idx); + auto full_tile_data = + frag_meta[f]->loaded_metadata()->get_tile_metadata("d", tile_idx); CHECK(correct_min == full_tile_data.min_as()); CHECK(correct_max == full_tile_data.max_as()); CHECK(correct_sum == full_tile_data.sum_as()); @@ -463,13 +466,13 @@ struct CPPFixedTileMetadataFx { if constexpr (std::is_same::value) { // Validate no min. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_min_as("a", 0), + frag_meta[f]->loaded_metadata()->get_tile_min_as("a", 0), "FragmentMetadata: Trying to access tile min metadata that's not " "present"); // Validate no max. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_max_as("a", 0), + frag_meta[f]->loaded_metadata()->get_tile_max_as("a", 0), "FragmentMetadata: Trying to access tile max metadata that's not " "present"); @@ -484,7 +487,9 @@ struct CPPFixedTileMetadataFx { for (uint64_t tile_idx = 0; tile_idx < num_tiles_; tile_idx++) { // Validate min. const auto min = - frag_meta[f]->get_tile_min_as("a", tile_idx); + frag_meta[f] + ->loaded_metadata() + ->get_tile_min_as("a", tile_idx); CHECK(min.size() == cell_val_num); // For strings, the index is stored in a signed value, switch to @@ -498,7 +503,9 @@ struct CPPFixedTileMetadataFx { // Validate max. const auto max = - frag_meta[f]->get_tile_max_as("a", tile_idx); + frag_meta[f] + ->loaded_metadata() + ->get_tile_max_as("a", tile_idx); CHECK(max.size() == cell_val_num); // For strings, the index is stored in a signed value, switch to @@ -519,7 +526,8 @@ struct CPPFixedTileMetadataFx { // Validate the tile metadata structure. auto full_tile_data = - frag_meta[f]->get_tile_metadata("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_metadata( + "a", tile_idx); CHECK( string_ascii_[min_idx] == full_tile_data.min_as()); @@ -532,7 +540,8 @@ struct CPPFixedTileMetadataFx { for (uint64_t tile_idx = 0; tile_idx < num_tiles_; tile_idx++) { // Validate min. const auto min = - frag_meta[f]->get_tile_min_as("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_min_as( + "a", tile_idx); CHECK( 0 == memcmp( @@ -540,7 +549,8 @@ struct CPPFixedTileMetadataFx { // Validate max. const auto max = - frag_meta[f]->get_tile_max_as("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_max_as( + "a", tile_idx); CHECK( 0 == memcmp( @@ -548,7 +558,8 @@ struct CPPFixedTileMetadataFx { // Validate the tile metadata structure. auto full_tile_data = - frag_meta[f]->get_tile_metadata("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_metadata( + "a", tile_idx); CHECK( correct_tile_mins_[f][tile_idx] == full_tile_data.min_as()); @@ -594,7 +605,8 @@ struct CPPFixedTileMetadataFx { if constexpr (!std::is_same::value) { // Validate the full tile data structure for null count for (uint64_t tile_idx = 0; tile_idx < num_tiles_; tile_idx++) { - auto full_tile_data = frag_meta[f]->get_tile_metadata("a", tile_idx); + auto full_tile_data = + frag_meta[f]->loaded_metadata()->get_tile_metadata("a", tile_idx); if (nullable) { CHECK( full_tile_data.null_count() == @@ -883,13 +895,15 @@ struct CPPVarTileMetadataFx { // Validate no min. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_min_as("d", tile_idx), + frag_meta[f]->loaded_metadata()->get_tile_min_as( + "d", tile_idx), "FragmentMetadata: Trying to access tile min metadata that's not " "present"); // Validate no max. CHECK_THROWS_WITH( - frag_meta[f]->get_tile_max_as("d", tile_idx), + frag_meta[f]->loaded_metadata()->get_tile_max_as( + "d", tile_idx), "FragmentMetadata: Trying to access tile max metadata that's not " "present"); @@ -899,7 +913,8 @@ struct CPPVarTileMetadataFx { CHECK(*(int64_t*)sum == correct_sum); // Validate the tile metadata structure. - auto full_tile_data = frag_meta[f]->get_tile_metadata("d", tile_idx); + auto full_tile_data = + frag_meta[f]->loaded_metadata()->get_tile_metadata("d", tile_idx); CHECK(correct_min == full_tile_data.min_as()); CHECK(correct_max == full_tile_data.max_as()); CHECK(correct_sum == full_tile_data.sum_as()); @@ -962,7 +977,8 @@ struct CPPVarTileMetadataFx { for (uint64_t tile_idx = 0; tile_idx < num_tiles_; tile_idx++) { // Validate min. const auto min = - frag_meta[f]->get_tile_min_as("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_min_as( + "a", tile_idx); int min_idx = correct_tile_mins_[f][tile_idx]; CHECK(min.size() == strings_[min_idx].size()); CHECK( @@ -973,7 +989,8 @@ struct CPPVarTileMetadataFx { // Validate max. const auto max = - frag_meta[f]->get_tile_max_as("a", tile_idx); + frag_meta[f]->loaded_metadata()->get_tile_max_as( + "a", tile_idx); int max_idx = correct_tile_maxs_[f][tile_idx]; CHECK(max.size() == strings_[max_idx].size()); CHECK( @@ -989,7 +1006,8 @@ struct CPPVarTileMetadataFx { "present"); // Validate the tile metadata structure. - auto full_tile_data = frag_meta[f]->get_tile_metadata("a", tile_idx); + auto full_tile_data = + frag_meta[f]->loaded_metadata()->get_tile_metadata("a", tile_idx); CHECK(strings_[min_idx] == full_tile_data.min_as()); CHECK(strings_[max_idx] == full_tile_data.max_as()); } @@ -1208,11 +1226,13 @@ struct CPPFixedTileMetadataPartialFx { // Validate attribute metadta. for (uint64_t tile_idx = 0; tile_idx < 4; tile_idx++) { // Validate min. - const auto min = frag_meta[0]->get_tile_min_as("a", tile_idx); + const auto min = frag_meta[0]->loaded_metadata()->get_tile_min_as( + "a", tile_idx); CHECK(0 == memcmp(&min, &correct_tile_mins[tile_idx], sizeof(double))); // Validate max. - const auto max = frag_meta[0]->get_tile_max_as("a", tile_idx); + const auto max = frag_meta[0]->loaded_metadata()->get_tile_max_as( + "a", tile_idx); CHECK(0 == memcmp(&max, &correct_tile_maxs[tile_idx], sizeof(double))); // Validate sum. @@ -1220,7 +1240,8 @@ struct CPPFixedTileMetadataPartialFx { CHECK(*(double*)sum - correct_tile_sums[tile_idx] < 0.0001); // Validate the tile metadata structure. - auto full_tile_data = frag_meta[0]->get_tile_metadata("a", tile_idx); + auto full_tile_data = + frag_meta[0]->loaded_metadata()->get_tile_metadata("a", tile_idx); CHECK(correct_tile_mins[tile_idx] == full_tile_data.min_as()); CHECK(correct_tile_maxs[tile_idx] == full_tile_data.max_as()); CHECK( @@ -1381,7 +1402,8 @@ struct CPPVarTileMetadataPartialFx { for (uint64_t tile_idx = 0; tile_idx < 4; tile_idx++) { // Validate min. const auto min = - frag_meta[0]->get_tile_min_as("a", tile_idx); + frag_meta[0]->loaded_metadata()->get_tile_min_as( + "a", tile_idx); CHECK(min.size() == correct_tile_mins[tile_idx].size()); CHECK( 0 == @@ -1389,14 +1411,16 @@ struct CPPVarTileMetadataPartialFx { // Validate max. const auto max = - frag_meta[0]->get_tile_max_as("a", tile_idx); + frag_meta[0]->loaded_metadata()->get_tile_max_as( + "a", tile_idx); CHECK(max.size() == correct_tile_maxs[tile_idx].size()); CHECK( 0 == memcmp(max.data(), correct_tile_maxs[tile_idx].data(), max.size())); // Validate the tile metadata structure. - auto full_tile_data = frag_meta[0]->get_tile_metadata("a", tile_idx); + auto full_tile_data = + frag_meta[0]->loaded_metadata()->get_tile_metadata("a", tile_idx); CHECK( correct_tile_mins[tile_idx] == full_tile_data.min_as()); @@ -1562,24 +1586,30 @@ struct CPPTileMetadataStringDimFx { enc_key, names); // Validate min. - CHECK(frag_meta[0]->get_tile_min_as("a", 0) == 4); + CHECK( + frag_meta[0]->loaded_metadata()->get_tile_min_as("a", 0) == 4); CHECK_THROWS_WITH( - frag_meta[0]->get_tile_min_as("d1", 0), + frag_meta[0]->loaded_metadata()->get_tile_min_as( + "d1", 0), "FragmentMetadata: Trying to access tile min metadata that's not " "present"); CHECK_THROWS_WITH( - frag_meta[0]->get_tile_min_as("d2", 0), + frag_meta[0]->loaded_metadata()->get_tile_min_as( + "d2", 0), "FragmentMetadata: Trying to access tile min metadata that's not " "present"); // Validate max. - CHECK(frag_meta[0]->get_tile_max_as("a", 0) == 7); + CHECK( + frag_meta[0]->loaded_metadata()->get_tile_max_as("a", 0) == 7); CHECK_THROWS_WITH( - frag_meta[0]->get_tile_max_as("d1", 0), + frag_meta[0]->loaded_metadata()->get_tile_max_as( + "d1", 0), "FragmentMetadata: Trying to access tile max metadata that's not " "present"); CHECK_THROWS_WITH( - frag_meta[0]->get_tile_max_as("d2", 0), + frag_meta[0]->loaded_metadata()->get_tile_max_as( + "d2", 0), "FragmentMetadata: Trying to access tile max metadata that's not " "present"); @@ -1588,16 +1618,19 @@ struct CPPTileMetadataStringDimFx { *(double*)frag_meta[0]->loaded_metadata()->get_tile_sum("a", 0) == 22); // Validate the tile metadata structure. - auto full_tile_data_a = frag_meta[0]->get_tile_metadata("a", 0); + auto full_tile_data_a = + frag_meta[0]->loaded_metadata()->get_tile_metadata("a", 0); CHECK(4 == full_tile_data_a.min_as()); CHECK(7 == full_tile_data_a.max_as()); CHECK(22 == full_tile_data_a.sum_as()); - auto full_tile_data_d1 = frag_meta[0]->get_tile_metadata("d1", 0); + auto full_tile_data_d1 = + frag_meta[0]->loaded_metadata()->get_tile_metadata("d1", 0); CHECK("a" == full_tile_data_d1.min_as()); CHECK("dddd" == full_tile_data_d1.max_as()); - auto full_tile_data_d2 = frag_meta[0]->get_tile_metadata("d2", 0); + auto full_tile_data_d2 = + frag_meta[0]->loaded_metadata()->get_tile_metadata("d2", 0); CHECK("a" == full_tile_data_d2.min_as()); CHECK("d" == full_tile_data_d2.max_as()); diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 65cf745fc33..543c0999ccd 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -985,6 +985,7 @@ Status Array::get_max_buffer_size( Status Array::get_max_buffer_size( const char* name, const void* subarray, + uint64_t* buffer_off_size, uint64_t* buffer_val_size) { // Check if array is open diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index 236cb1e8de4..e6a38d21c6b 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -122,193 +122,190 @@ FragmentMetadata::FragmentMetadata( /* API */ /* ****************************** */ -void FragmentMetadata::set_mbr(uint64_t tile, const NDRange& mbr) { - // For easy reference - tile += tile_index_base_; - loaded_metadata_ptr_->rtree().set_leaf(tile, mbr); - return expand_non_empty_domain(mbr); -} - void FragmentMetadata::set_tile_index_base(uint64_t tile_base) { tile_index_base_ = tile_base; } void FragmentMetadata::set_tile_offset( - const std::string& name, uint64_t tid, uint64_t step) { + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_offsets) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < loaded_metadata_ptr_->tile_offsets()[idx].size()); - loaded_metadata_ptr_->tile_offsets()[idx][tid] = file_sizes_[idx]; + assert(tid < tile_offsets[idx].size()); + tile_offsets[idx][tid] = file_sizes_[idx]; file_sizes_[idx] += step; } void FragmentMetadata::set_tile_var_offset( - const std::string& name, uint64_t tid, uint64_t step) { + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_var_offsets) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < loaded_metadata_ptr_->tile_var_offsets()[idx].size()); - loaded_metadata_ptr_->tile_var_offsets()[idx][tid] = file_var_sizes_[idx]; + assert(tid < tile_var_offsets[idx].size()); + tile_var_offsets[idx][tid] = file_var_sizes_[idx]; file_var_sizes_[idx] += step; } void FragmentMetadata::set_tile_var_size( - const std::string& name, uint64_t tid, uint64_t size) { + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_var_sizes) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < loaded_metadata_ptr_->tile_var_sizes()[idx].size()); - loaded_metadata_ptr_->tile_var_sizes()[idx][tid] = size; + assert(tid < tile_var_sizes[idx].size()); + tile_var_sizes[idx][tid] = size; } void FragmentMetadata::set_tile_validity_offset( - const std::string& name, uint64_t tid, uint64_t step) { + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_validity_offsets) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < loaded_metadata_ptr_->tile_validity_offsets()[idx].size()); - loaded_metadata_ptr_->tile_validity_offsets()[idx][tid] = - file_validity_sizes_[idx]; + assert(tid < tile_validity_offsets[idx].size()); + tile_validity_offsets[idx][tid] = file_validity_sizes_[idx]; file_validity_sizes_[idx] += step; } void FragmentMetadata::set_tile_min( - const std::string& name, uint64_t tid, const ByteVec& min) { + const std::string& name, + uint64_t tid, + const ByteVec& min, + tdb::pmr::vector>& tile_min_buffer) { const auto size = min.size(); auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * size; - assert(tid < loaded_metadata_ptr_->tile_min_buffer()[idx].size() / size); - memcpy( - &loaded_metadata_ptr_->tile_min_buffer()[idx][buff_offset], - min.data(), - size); + assert(tid < tile_min_buffer[idx].size() / size); + memcpy(&tile_min_buffer[idx][buff_offset], min.data(), size); } void FragmentMetadata::set_tile_min_var_size( - const std::string& name, uint64_t tid, uint64_t size) { + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_min_buffer) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * sizeof(uint64_t); - assert( - tid < - loaded_metadata_ptr_->tile_min_buffer()[idx].size() / sizeof(uint64_t)); + assert(tid < tile_min_buffer[idx].size() / sizeof(uint64_t)); - auto offset = - (uint64_t*)&loaded_metadata_ptr_->tile_min_buffer()[idx][buff_offset]; + auto offset = (uint64_t*)&tile_min_buffer[idx][buff_offset]; *offset = size; } void FragmentMetadata::set_tile_min_var( - const std::string& name, uint64_t tid, const ByteVec& min) { + const std::string& name, + uint64_t tid, + const ByteVec& min, + tdb::pmr::vector>& tile_min_buffer, + tdb::pmr::vector>& tile_min_var_buffer) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * sizeof(uint64_t); - assert( - tid < - loaded_metadata_ptr_->tile_min_buffer()[idx].size() / sizeof(uint64_t)); - - auto offset = - (uint64_t*)&loaded_metadata_ptr_->tile_min_buffer()[idx][buff_offset]; - auto size = - buff_offset != loaded_metadata_ptr_->tile_min_buffer()[idx].size() - - sizeof(uint64_t) ? - offset[1] - offset[0] : - loaded_metadata_ptr_->tile_min_var_buffer()[idx].size() - offset[0]; + assert(tid < tile_min_buffer[idx].size() / sizeof(uint64_t)); + + auto offset = (uint64_t*)&tile_min_buffer[idx][buff_offset]; + auto size = buff_offset != tile_min_buffer[idx].size() - sizeof(uint64_t) ? + offset[1] - offset[0] : + tile_min_var_buffer[idx].size() - offset[0]; // Copy var data if (size) { // avoid (potentially) illegal index ref's when size is zero - memcpy( - &loaded_metadata_ptr_->tile_min_var_buffer()[idx][offset[0]], - min.data(), - size); + memcpy(&tile_min_var_buffer[idx][offset[0]], min.data(), size); } } void FragmentMetadata::set_tile_max( - const std::string& name, uint64_t tid, const ByteVec& max) { + const std::string& name, + uint64_t tid, + const ByteVec& max, + tdb::pmr::vector>& tile_max_buffer) { const auto size = max.size(); auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * size; - assert(tid < loaded_metadata_ptr_->tile_max_buffer()[idx].size() / size); - memcpy( - &loaded_metadata_ptr_->tile_max_buffer()[idx][buff_offset], - max.data(), - size); + assert(tid < tile_max_buffer[idx].size() / size); + memcpy(&tile_max_buffer[idx][buff_offset], max.data(), size); } void FragmentMetadata::set_tile_max_var_size( - const std::string& name, uint64_t tid, uint64_t size) { + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_max_buffer) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * sizeof(uint64_t); - assert( - tid < - loaded_metadata_ptr_->tile_max_buffer()[idx].size() / sizeof(uint64_t)); + assert(tid < tile_max_buffer[idx].size() / sizeof(uint64_t)); - auto offset = - (uint64_t*)&loaded_metadata_ptr_->tile_max_buffer()[idx][buff_offset]; + auto offset = (uint64_t*)&tile_max_buffer[idx][buff_offset]; *offset = size; } void FragmentMetadata::set_tile_max_var( - const std::string& name, uint64_t tid, const ByteVec& max) { + const std::string& name, + uint64_t tid, + const ByteVec& max, + tdb::pmr::vector>& tile_max_buffer, + tdb::pmr::vector>& tile_max_var_buffer) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; auto buff_offset = tid * sizeof(uint64_t); - assert( - tid < - loaded_metadata_ptr_->tile_max_buffer()[idx].size() / sizeof(uint64_t)); - - auto offset = - (uint64_t*)&loaded_metadata_ptr_->tile_max_buffer()[idx][buff_offset]; - auto size = - buff_offset != loaded_metadata_ptr_->tile_max_buffer()[idx].size() - - sizeof(uint64_t) ? - offset[1] - offset[0] : - loaded_metadata_ptr_->tile_max_var_buffer()[idx].size() - offset[0]; + assert(tid < tile_max_buffer[idx].size() / sizeof(uint64_t)); + + auto offset = (uint64_t*)&tile_max_buffer[idx][buff_offset]; + auto size = buff_offset != tile_max_buffer[idx].size() - sizeof(uint64_t) ? + offset[1] - offset[0] : + tile_max_var_buffer[idx].size() - offset[0]; // Copy var data if (size) { // avoid (potentially) illegal index ref's when size is zero - memcpy( - &loaded_metadata_ptr_->tile_max_var_buffer()[idx][offset[0]], - max.data(), - size); + memcpy(&tile_max_var_buffer[idx][offset[0]], max.data(), size); } } void FragmentMetadata::convert_tile_min_max_var_sizes_to_offsets( - const std::string& name) { + const std::string& name, + tdb::pmr::vector>& tile_min_var_buffer, + tdb::pmr::vector>& tile_min_buffer, + tdb::pmr::vector>& tile_max_var_buffer, + tdb::pmr::vector>& tile_max_buffer) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; // Fix the min offsets. - uint64_t offset = loaded_metadata_ptr_->tile_min_var_buffer()[idx].size(); - auto offsets = - (uint64_t*)loaded_metadata_ptr_->tile_min_buffer()[idx].data() + - tile_index_base_; + uint64_t offset = tile_min_var_buffer[idx].size(); + auto offsets = (uint64_t*)tile_min_buffer[idx].data() + tile_index_base_; for (uint64_t i = tile_index_base_; - i < - loaded_metadata_ptr_->tile_min_buffer()[idx].size() / sizeof(uint64_t); + i < tile_min_buffer[idx].size() / sizeof(uint64_t); i++) { auto size = *offsets; *offsets = offset; @@ -317,15 +314,13 @@ void FragmentMetadata::convert_tile_min_max_var_sizes_to_offsets( } // Allocate min var data buffer. - loaded_metadata_ptr_->tile_min_var_buffer()[idx].resize(offset); + tile_min_var_buffer[idx].resize(offset); // Fix the max offsets. - offset = loaded_metadata_ptr_->tile_max_var_buffer()[idx].size(); - offsets = (uint64_t*)loaded_metadata_ptr_->tile_max_buffer()[idx].data() + - tile_index_base_; + offset = tile_max_var_buffer[idx].size(); + offsets = (uint64_t*)tile_max_buffer[idx].data() + tile_index_base_; for (uint64_t i = tile_index_base_; - i < - loaded_metadata_ptr_->tile_max_buffer()[idx].size() / sizeof(uint64_t); + i < tile_max_buffer[idx].size() / sizeof(uint64_t); i++) { auto size = *offsets; *offsets = offset; @@ -334,135 +329,33 @@ void FragmentMetadata::convert_tile_min_max_var_sizes_to_offsets( } // Allocate min var data buffer. - loaded_metadata_ptr_->tile_max_var_buffer()[idx].resize(offset); + tile_max_var_buffer[idx].resize(offset); } void FragmentMetadata::set_tile_sum( - const std::string& name, uint64_t tid, const ByteVec& sum) { + const std::string& name, + uint64_t tid, + const ByteVec& sum, + tdb::pmr::vector>& tile_sums) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert( - tid * sizeof(uint64_t) < loaded_metadata_ptr_->tile_sums()[idx].size()); - memcpy( - &loaded_metadata_ptr_->tile_sums()[idx][tid * sizeof(uint64_t)], - sum.data(), - sizeof(uint64_t)); + assert(tid * sizeof(uint64_t) < tile_sums[idx].size()); + memcpy(&tile_sums[idx][tid * sizeof(uint64_t)], sum.data(), sizeof(uint64_t)); } void FragmentMetadata::set_tile_null_count( - const std::string& name, uint64_t tid, uint64_t null_count) { + const std::string& name, + uint64_t tid, + uint64_t null_count, + tdb::pmr::vector>& tile_null_counts) { auto it = idx_map_.find(name); assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < loaded_metadata_ptr_->tile_null_counts()[idx].size()); - loaded_metadata_ptr_->tile_null_counts()[idx][tid] = null_count; -} - -template <> -void FragmentMetadata::compute_fragment_min_max_sum( - const std::string& name); - -void FragmentMetadata::compute_fragment_min_max_sum_null_count() { - std::vector names; - names.reserve(idx_map_.size()); - for (auto& it : idx_map_) { - names.emplace_back(it.first); - } - - // Process all attributes in parallel. - throw_if_not_ok(parallel_for( - &resources_->compute_tp(), 0, idx_map_.size(), [&](uint64_t n) { - // For easy reference. - const auto& name = names[n]; - const auto& idx = idx_map_[name]; - const auto var_size = array_schema_->var_size(name); - const auto type = array_schema_->type(name); - - // Compute null count. - loaded_metadata_ptr_->fragment_null_counts()[idx] = std::accumulate( - loaded_metadata_ptr_->tile_null_counts()[idx].begin(), - loaded_metadata_ptr_->tile_null_counts()[idx].end(), - 0); - - if (var_size) { - min_max_var(name); - } else { - // Switch depending on datatype. - switch (type) { - case Datatype::INT8: - compute_fragment_min_max_sum(name); - break; - case Datatype::INT16: - compute_fragment_min_max_sum(name); - break; - case Datatype::INT32: - compute_fragment_min_max_sum(name); - break; - case Datatype::INT64: - compute_fragment_min_max_sum(name); - break; - case Datatype::BOOL: - case Datatype::UINT8: - compute_fragment_min_max_sum(name); - break; - case Datatype::UINT16: - compute_fragment_min_max_sum(name); - break; - case Datatype::UINT32: - compute_fragment_min_max_sum(name); - break; - case Datatype::UINT64: - compute_fragment_min_max_sum(name); - break; - case Datatype::FLOAT32: - compute_fragment_min_max_sum(name); - break; - case Datatype::FLOAT64: - compute_fragment_min_max_sum(name); - break; - case Datatype::DATETIME_YEAR: - case Datatype::DATETIME_MONTH: - case Datatype::DATETIME_WEEK: - case Datatype::DATETIME_DAY: - case Datatype::DATETIME_HR: - case Datatype::DATETIME_MIN: - case Datatype::DATETIME_SEC: - case Datatype::DATETIME_MS: - case Datatype::DATETIME_US: - case Datatype::DATETIME_NS: - case Datatype::DATETIME_PS: - case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - case Datatype::TIME_HR: - case Datatype::TIME_MIN: - case Datatype::TIME_SEC: - case Datatype::TIME_MS: - case Datatype::TIME_US: - case Datatype::TIME_NS: - case Datatype::TIME_PS: - case Datatype::TIME_FS: - case Datatype::TIME_AS: - compute_fragment_min_max_sum(name); - break; - case Datatype::STRING_ASCII: - case Datatype::CHAR: - compute_fragment_min_max_sum(name); - break; - case Datatype::BLOB: - case Datatype::GEOM_WKB: - case Datatype::GEOM_WKT: - compute_fragment_min_max_sum(name); - break; - default: - break; - } - } - - return Status::Ok(); - })); + assert(tid < tile_null_counts[idx].size()); + tile_null_counts[idx][tid] = null_count; } void FragmentMetadata::set_array_schema( @@ -724,9 +617,6 @@ void FragmentMetadata::init(const NDRange& non_empty_domain) { // Set last tile cell number last_tile_cell_num_ = 0; - // Initialize tile offsets - loaded_metadata_ptr_->resize_offsets(num); - file_sizes_.resize(num); for (unsigned int i = 0; i < num; ++i) file_sizes_[i] = 0; @@ -841,7 +731,33 @@ void FragmentMetadata::load( } } -void FragmentMetadata::store(const EncryptionKey& encryption_key) { +void FragmentMetadata::store_rtree( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { + auto rtree_tile = write_rtree(loaded_metadata); + write_generic_tile_to_file(encryption_key, rtree_tile, nbytes); + resources_->stats().add_counter("write_rtree_size", *nbytes); +} + +shared_ptr FragmentMetadata::write_rtree( + shared_ptr loaded_metadata) { + loaded_metadata->rtree().build_tree(); + SizeComputationSerializer size_computation_serializer; + loaded_metadata->rtree().serialize(size_computation_serializer); + + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; + + Serializer serializer(tile->data(), tile->size()); + loaded_metadata->rtree().serialize(serializer); + + return tile; +} + +void FragmentMetadata::store( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key) { auto timer_se = resources_->stats().start_timer("write_store_frag_meta"); // Make sure the data fits in the current domain before we commit to disk. @@ -863,13 +779,13 @@ void FragmentMetadata::store(const EncryptionKey& encryption_key) { } try { if (version_ <= 10) { - store_v7_v10(encryption_key); + store_v7_v10(loaded_metadata, encryption_key); } else if (version_ == 11) { - store_v11(encryption_key); + store_v11(loaded_metadata, encryption_key); } else if (version_ <= 14) { - store_v12_v14(encryption_key); + store_v12_v14(loaded_metadata, encryption_key); } else { - store_v15_or_higher(encryption_key); + store_v15_or_higher(loaded_metadata, encryption_key); } return; } catch (...) { @@ -882,7 +798,9 @@ void FragmentMetadata::store(const EncryptionKey& encryption_key) { } } -void FragmentMetadata::store_v7_v10(const EncryptionKey& encryption_key) { +void FragmentMetadata::store_v7_v10( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key) { auto fragment_metadata_uri = fragment_uri_.join_path(constants::fragment_metadata_filename); auto num = num_dims_and_attrs(); @@ -890,14 +808,14 @@ void FragmentMetadata::store_v7_v10(const EncryptionKey& encryption_key) { // Store R-Tree gt_offsets_.rtree_ = offset; - store_rtree(encryption_key, &nbytes); + store_rtree(loaded_metadata, encryption_key, &nbytes); offset += nbytes; // Store tile offsets gt_offsets_.tile_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_offsets_[i] = offset; - store_tile_offsets(i, encryption_key, &nbytes); + store_tile_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -905,7 +823,7 @@ void FragmentMetadata::store_v7_v10(const EncryptionKey& encryption_key) { gt_offsets_.tile_var_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_offsets_[i] = offset; - store_tile_var_offsets(i, encryption_key, &nbytes); + store_tile_var_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -932,7 +850,9 @@ void FragmentMetadata::store_v7_v10(const EncryptionKey& encryption_key) { throw_if_not_ok(resources_->vfs().close_file(fragment_metadata_uri)); } -void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { +void FragmentMetadata::store_v11( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key) { auto fragment_metadata_uri = fragment_uri_.join_path(constants::fragment_metadata_filename); auto num = num_dims_and_attrs(); @@ -940,14 +860,14 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { // Store R-Tree gt_offsets_.rtree_ = offset; - store_rtree(encryption_key, &nbytes); + store_rtree(loaded_metadata, encryption_key, &nbytes); offset += nbytes; // Store tile offsets gt_offsets_.tile_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_offsets_[i] = offset; - store_tile_offsets(i, encryption_key, &nbytes); + store_tile_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -955,7 +875,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_var_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_offsets_[i] = offset; - store_tile_var_offsets(i, encryption_key, &nbytes); + store_tile_var_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -963,7 +883,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_var_sizes_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_sizes_[i] = offset; - store_tile_var_sizes(i, encryption_key, &nbytes); + store_tile_var_sizes(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -971,7 +891,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_validity_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_validity_offsets_[i] = offset; - store_tile_validity_offsets(i, encryption_key, &nbytes); + store_tile_validity_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -979,7 +899,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_min_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_min_offsets_[i] = offset; - store_tile_mins(i, encryption_key, &nbytes); + store_tile_mins(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -987,7 +907,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_max_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_max_offsets_[i] = offset; - store_tile_maxs(i, encryption_key, &nbytes); + store_tile_maxs(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -995,7 +915,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_sum_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_sum_offsets_[i] = offset; - store_tile_sums(i, encryption_key, &nbytes); + store_tile_sums(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1003,7 +923,7 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { gt_offsets_.tile_null_count_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_null_count_offsets_[i] = offset; - store_tile_null_counts(i, encryption_key, &nbytes); + store_tile_null_counts(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1014,7 +934,9 @@ void FragmentMetadata::store_v11(const EncryptionKey& encryption_key) { throw_if_not_ok(resources_->vfs().close_file(fragment_metadata_uri)); } -void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { +void FragmentMetadata::store_v12_v14( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key) { auto fragment_metadata_uri = fragment_uri_.join_path(constants::fragment_metadata_filename); auto num = num_dims_and_attrs(); @@ -1022,14 +944,15 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { // Store R-Tree gt_offsets_.rtree_ = offset; - store_rtree(encryption_key, &nbytes); + store_rtree(loaded_metadata, encryption_key, &nbytes); offset += nbytes; // Store tile offsets gt_offsets_.tile_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_offsets_[i] = offset; - store_tile_offsets(i, encryption_key, &nbytes); + store_tile_offsets( + loaded_metadata, loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1037,7 +960,8 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_var_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_offsets_[i] = offset; - store_tile_var_offsets(i, encryption_key, &nbytes); + store_tile_var_offsets( + loaded_metadata, loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1045,7 +969,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_var_sizes_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_sizes_[i] = offset; - store_tile_var_sizes(i, encryption_key, &nbytes); + store_tile_var_sizes(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1053,7 +977,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_validity_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_validity_offsets_[i] = offset; - store_tile_validity_offsets(i, encryption_key, &nbytes); + store_tile_validity_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1061,7 +985,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_min_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_min_offsets_[i] = offset; - store_tile_mins(i, encryption_key, &nbytes); + store_tile_mins(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1069,7 +993,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_max_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_max_offsets_[i] = offset; - store_tile_maxs(i, encryption_key, &nbytes); + store_tile_maxs(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1077,7 +1001,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_sum_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_sum_offsets_[i] = offset; - store_tile_sums(i, encryption_key, &nbytes); + store_tile_sums(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1085,13 +1009,14 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { gt_offsets_.tile_null_count_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_null_count_offsets_[i] = offset; - store_tile_null_counts(i, encryption_key, &nbytes); + store_tile_null_counts(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } // Store fragment min, max, sum and null count gt_offsets_.fragment_min_max_sum_null_count_offset_ = offset; - store_fragment_min_max_sum_null_count(num, encryption_key, &nbytes); + store_fragment_min_max_sum_null_count( + loaded_metadata, num, encryption_key, &nbytes); offset += nbytes; // Store footer @@ -1102,6 +1027,7 @@ void FragmentMetadata::store_v12_v14(const EncryptionKey& encryption_key) { } void FragmentMetadata::store_v15_or_higher( + shared_ptr loaded_metadata, const EncryptionKey& encryption_key) { auto fragment_metadata_uri = fragment_uri_.join_path(constants::fragment_metadata_filename); @@ -1110,14 +1036,15 @@ void FragmentMetadata::store_v15_or_higher( // Store R-Tree gt_offsets_.rtree_ = offset; - store_rtree(encryption_key, &nbytes); + store_rtree(loaded_metadata, encryption_key, &nbytes); offset += nbytes; // Store tile offsets gt_offsets_.tile_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_offsets_[i] = offset; - store_tile_offsets(i, encryption_key, &nbytes); + store_tile_offsets( + loaded_metadata, loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1125,7 +1052,8 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_var_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_offsets_[i] = offset; - store_tile_var_offsets(i, encryption_key, &nbytes); + store_tile_var_offsets( + loaded_metadata, loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1133,7 +1061,7 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_var_sizes_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_var_sizes_[i] = offset; - store_tile_var_sizes(i, encryption_key, &nbytes); + store_tile_var_sizes(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1141,7 +1069,7 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_validity_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_validity_offsets_[i] = offset; - store_tile_validity_offsets(i, encryption_key, &nbytes); + store_tile_validity_offsets(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1149,7 +1077,7 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_min_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_min_offsets_[i] = offset; - store_tile_mins(i, encryption_key, &nbytes); + store_tile_mins(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1157,7 +1085,7 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_max_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_max_offsets_[i] = offset; - store_tile_maxs(i, encryption_key, &nbytes); + store_tile_maxs(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1165,7 +1093,7 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_sum_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_sum_offsets_[i] = offset; - store_tile_sums(i, encryption_key, &nbytes); + store_tile_sums(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } @@ -1173,18 +1101,19 @@ void FragmentMetadata::store_v15_or_higher( gt_offsets_.tile_null_count_offsets_.resize(num); for (unsigned int i = 0; i < num; ++i) { gt_offsets_.tile_null_count_offsets_[i] = offset; - store_tile_null_counts(i, encryption_key, &nbytes); + store_tile_null_counts(loaded_metadata, i, encryption_key, &nbytes); offset += nbytes; } // Store fragment min, max, sum and null count gt_offsets_.fragment_min_max_sum_null_count_offset_ = offset; - store_fragment_min_max_sum_null_count(num, encryption_key, &nbytes); + store_fragment_min_max_sum_null_count( + loaded_metadata, num, encryption_key, &nbytes); offset += nbytes; // Store processed condition gt_offsets_.processed_conditions_offsets_ = offset; - store_processed_conditions(encryption_key, &nbytes); + store_processed_conditions(loaded_metadata, encryption_key, &nbytes); offset += nbytes; // Store footer @@ -1194,10 +1123,19 @@ void FragmentMetadata::store_v15_or_higher( throw_if_not_ok(resources_->vfs().close_file(fragment_metadata_uri)); } -void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { +void FragmentMetadata::set_num_tiles( + uint64_t num_tiles, + tdb::pmr::vector>& tile_offsets, + tdb::pmr::vector>& tile_var_offsets, + tdb::pmr::vector>& tile_var_sizes, + tdb::pmr::vector>& tile_validity_offsets, + tdb::pmr::vector>& tile_min_buffer, + tdb::pmr::vector>& tile_max_buffer, + tdb::pmr::vector>& tile_sums, + tdb::pmr::vector>& tile_null_counts) { for (auto& it : idx_map_) { auto i = it.second; - assert(num_tiles >= loaded_metadata_ptr_->tile_offsets()[i].size()); + assert(num_tiles >= tile_offsets[i].size()); // Get the fixed cell size const auto is_dim = array_schema_->is_dim(it.first); @@ -1205,10 +1143,10 @@ void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { const auto cell_size = var_size ? constants::cell_var_offset_size : array_schema_->cell_size(it.first); - loaded_metadata_ptr_->tile_offsets()[i].resize(num_tiles, 0); - loaded_metadata_ptr_->tile_var_offsets()[i].resize(num_tiles, 0); - loaded_metadata_ptr_->tile_var_sizes()[i].resize(num_tiles, 0); - loaded_metadata_ptr_->tile_validity_offsets()[i].resize(num_tiles, 0); + tile_offsets[i].resize(num_tiles, 0); + tile_var_offsets[i].resize(num_tiles, 0); + tile_var_sizes[i].resize(num_tiles, 0); + tile_validity_offsets[i].resize(num_tiles, 0); // No metadata for dense coords if (!array_schema_->dense() || !is_dim) { @@ -1217,26 +1155,22 @@ void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { if (TileMetadataGenerator::has_min_max_metadata( type, is_dim, var_size, cell_val_num)) { - loaded_metadata_ptr_->tile_min_buffer()[i].resize( - num_tiles * cell_size, 0); - loaded_metadata_ptr_->tile_max_buffer()[i].resize( - num_tiles * cell_size, 0); + tile_min_buffer[i].resize(num_tiles * cell_size, 0); + tile_max_buffer[i].resize(num_tiles * cell_size, 0); } if (TileMetadataGenerator::has_sum_metadata( type, var_size, cell_val_num)) { if (!var_size) - loaded_metadata_ptr_->tile_sums()[i].resize( - num_tiles * sizeof(uint64_t), 0); + tile_sums[i].resize(num_tiles * sizeof(uint64_t), 0); } if (array_schema_->is_nullable(it.first)) - loaded_metadata_ptr_->tile_null_counts()[i].resize(num_tiles, 0); + tile_null_counts[i].resize(num_tiles, 0); } } if (!dense_) { - loaded_metadata_ptr_->rtree().set_leaf_num(num_tiles); sparse_tile_num_ = num_tiles; } } @@ -1363,14 +1297,6 @@ const std::string& FragmentMetadata::array_schema_name() const { return array_schema_name_; } -const NDRange& FragmentMetadata::mbr(uint64_t tile_idx) const { - return loaded_metadata_ptr_->rtree().leaf(tile_idx); -} - -const tdb::pmr::vector& FragmentMetadata::mbrs() const { - return loaded_metadata_ptr_->rtree().leaves(); -} - uint64_t FragmentMetadata::tile_size( const std::string& name, uint64_t tile_idx) const { auto var_size = array_schema_->var_size(name); @@ -1379,247 +1305,6 @@ uint64_t FragmentMetadata::tile_size( cell_num * array_schema_->cell_size(name); } -template -T FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const { - const auto var_size = array_schema_->var_size(name); - if (var_size) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata as wrong type"); - } - - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_ptr_->loaded_metadata().tile_min_[idx]) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata that's not loaded"); - } - - const auto type = array_schema_->type(name); - const auto is_dim = array_schema_->is_dim(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - if (!TileMetadataGenerator::has_min_max_metadata( - type, is_dim, var_size, cell_val_num)) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata that's not present"); - } - - auto size = array_schema_->cell_size(name); - const void* min = - &loaded_metadata_ptr_->tile_min_buffer()[idx][tile_idx * size]; - if constexpr (std::is_same_v) { - return min; - } else { - return *static_cast(min); - } -} - -template <> -std::string_view FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const { - const auto type = array_schema_->type(name); - const auto var_size = array_schema_->var_size(name); - if (!var_size && type != Datatype::STRING_ASCII && type != Datatype::CHAR) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata as wrong type"); - } - - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_ptr_->loaded_metadata().tile_min_[idx]) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata that's not loaded"); - } - - const auto is_dim = array_schema_->is_dim(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - if (!TileMetadataGenerator::has_min_max_metadata( - type, is_dim, var_size, cell_val_num)) { - throw FragmentMetadataStatusException( - "Trying to access tile min metadata that's not present"); - } - - using sv_size_cast = std::string_view::size_type; - if (var_size) { - auto tile_num = this->tile_num(); - auto offsets = - (uint64_t*)loaded_metadata_ptr_->tile_min_buffer()[idx].data(); - auto min_offset = offsets[tile_idx]; - auto size = - tile_idx == tile_num - 1 ? - static_cast( - loaded_metadata_ptr_->tile_min_var_buffer()[idx].size() - - min_offset) : - static_cast(offsets[tile_idx + 1] - min_offset); - if (size == 0) { - return {}; - } - - const char* min = - &loaded_metadata_ptr_->tile_min_var_buffer()[idx][min_offset]; - return {min, size}; - } else { - auto size = static_cast(array_schema_->cell_size(name)); - const void* min = - &loaded_metadata_ptr_->tile_min_buffer()[idx][tile_idx * size]; - return {static_cast(min), size}; - } -} - -template -T FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const { - const auto var_size = array_schema_->var_size(name); - if (var_size) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata as wrong type"); - } - - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_ptr_->loaded_metadata().tile_max_[idx]) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata that's not loaded"); - } - - const auto type = array_schema_->type(name); - const auto is_dim = array_schema_->is_dim(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - if (!TileMetadataGenerator::has_min_max_metadata( - type, is_dim, var_size, cell_val_num)) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata that's not present"); - } - - auto size = array_schema_->cell_size(name); - const void* max = - &loaded_metadata_ptr_->tile_max_buffer()[idx][tile_idx * size]; - if constexpr (std::is_same_v) { - return max; - } else { - return *static_cast(max); - } -} - -template <> -std::string_view FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const { - const auto type = array_schema_->type(name); - const auto var_size = array_schema_->var_size(name); - if (!var_size && type != Datatype::STRING_ASCII && type != Datatype::CHAR) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata as wrong type"); - } - - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_ptr_->loaded_metadata().tile_max_[idx]) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata that's not loaded"); - } - - const auto is_dim = array_schema_->is_dim(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - if (!TileMetadataGenerator::has_min_max_metadata( - type, is_dim, var_size, cell_val_num)) { - throw FragmentMetadataStatusException( - "Trying to access tile max metadata that's not present"); - } - - using sv_size_cast = std::string_view::size_type; - if (var_size) { - auto tile_num = this->tile_num(); - auto offsets = - (uint64_t*)loaded_metadata_ptr_->tile_max_buffer()[idx].data(); - auto max_offset = offsets[tile_idx]; - auto size = - tile_idx == tile_num - 1 ? - static_cast( - loaded_metadata_ptr_->tile_max_var_buffer()[idx].size() - - max_offset) : - static_cast(offsets[tile_idx + 1] - max_offset); - if (size == 0) { - return {}; - } - - const char* max = - &loaded_metadata_ptr_->tile_max_var_buffer()[idx][max_offset]; - return {max, size}; - } else { - auto size = static_cast(array_schema_->cell_size(name)); - const void* max = - &loaded_metadata_ptr_->tile_max_buffer()[idx][tile_idx * size]; - return {static_cast(max), size}; - } -} - -TileMetadata FragmentMetadata::get_tile_metadata( - const std::string& name, const uint64_t tile_idx) const { - auto var_size = array_schema_->var_size(name); - auto is_dim = array_schema_->is_dim(name); - auto count = cell_num(tile_idx); - - if (name == constants::count_of_rows) { - return {count, 0, nullptr, 0, nullptr, 0, nullptr}; - } - - uint64_t null_count = 0; - if (array_schema_->is_nullable(name)) { - null_count = loaded_metadata_ptr_->get_tile_null_count(name, tile_idx); - } - - unsigned dim_idx = 0; - const NDRange* mbr = nullptr; - if (is_dim) { - dim_idx = array_schema_->domain().get_dimension_index(name); - mbr = &loaded_metadata_ptr_->rtree().leaf(tile_idx); - } - - if (var_size) { - std::string_view min = - is_dim ? mbr->at(dim_idx).start_str() : - get_tile_min_as(name, tile_idx); - std::string_view max = - is_dim ? mbr->at(dim_idx).end_str() : - get_tile_max_as(name, tile_idx); - return { - count, - null_count, - min.data(), - min.size(), - max.data(), - max.size(), - nullptr}; - } else { - auto cell_size = array_schema_->cell_size(name); - const void* min = is_dim ? mbr->at(dim_idx).start_fixed() : - get_tile_min_as(name, tile_idx); - const void* max = is_dim ? mbr->at(dim_idx).end_fixed() : - get_tile_max_as(name, tile_idx); - - const auto type = array_schema_->type(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - const void* sum = nullptr; - if (TileMetadataGenerator::has_sum_metadata(type, false, cell_val_num)) { - sum = loaded_metadata_ptr_->get_tile_sum(name, tile_idx); - } - - return {count, null_count, min, cell_size, max, cell_size, sum}; - } -} - -void FragmentMetadata::set_processed_conditions( - std::vector& processed_conditions) { - loaded_metadata_ptr_->processed_conditions() = processed_conditions; - loaded_metadata_ptr_->processed_conditions_set() = - std::unordered_set( - processed_conditions.begin(), processed_conditions.end()); -} - uint64_t FragmentMetadata::first_timestamp() const { return timestamp_range_.first; } @@ -2672,27 +2357,6 @@ void FragmentMetadata::write_last_tile_cell_num(Serializer& serializer) const { serializer.write(last_tile_cell_num); } -void FragmentMetadata::store_rtree( - const EncryptionKey& encryption_key, uint64_t* nbytes) { - auto rtree_tile = write_rtree(); - write_generic_tile_to_file(encryption_key, rtree_tile, nbytes); - resources_->stats().add_counter("write_rtree_size", *nbytes); -} - -shared_ptr FragmentMetadata::write_rtree() { - loaded_metadata_ptr_->rtree().build_tree(); - SizeComputationSerializer size_computation_serializer; - loaded_metadata_ptr_->rtree().serialize(size_computation_serializer); - - auto tile{WriterTile::from_generic( - size_computation_serializer.size(), memory_tracker_)}; - - Serializer serializer(tile->data(), tile->size()); - loaded_metadata_ptr_->rtree().serialize(serializer); - - return tile; -} - // ===== FORMAT ===== // null_non_empty_domain(char) // fix-sized: range(void*) @@ -2792,7 +2456,10 @@ void FragmentMetadata::write_footer_to_file(shared_ptr tile) const { } void FragmentMetadata::store_tile_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_offsets(idx, size_computation_serializer); @@ -2800,59 +2467,69 @@ void FragmentMetadata::store_tile_offsets( size_computation_serializer.size(), memory_tracker_)}; Serializer serializer(tile->data(), tile->size()); - write_tile_offsets(idx, serializer); + write_tile_offsets(loaded_metatada, idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); resources_->stats().add_counter("write_tile_offsets_size", *nbytes); } void FragmentMetadata::write_tile_offsets( - unsigned idx, Serializer& serializer) { + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write number of tile offsets - uint64_t tile_offsets_num = loaded_metadata_ptr_->tile_offsets()[idx].size(); + uint64_t tile_offsets_num = loaded_metadata->tile_offsets()[idx].size(); serializer.write(tile_offsets_num); // Write tile offsets if (tile_offsets_num != 0) { serializer.write( - &loaded_metadata_ptr_->tile_offsets()[idx][0], + &loaded_metadata->tile_offsets()[idx][0], tile_offsets_num * sizeof(uint64_t)); } } void FragmentMetadata::store_tile_var_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; - write_tile_var_offsets(idx, size_computation_serializer); + write_tile_var_offsets(loaded_metadata, idx, size_computation_serializer); auto tile{WriterTile::from_generic( size_computation_serializer.size(), memory_tracker_)}; Serializer serializer(tile->data(), tile->size()); - write_tile_var_offsets(idx, serializer); + write_tile_var_offsets(loaded_metadata, idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); resources_->stats().add_counter("write_tile_var_offsets_size", *nbytes); } void FragmentMetadata::write_tile_var_offsets( - unsigned idx, Serializer& serializer) { + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write tile offsets for each attribute // Write number of offsets uint64_t tile_var_offsets_num = - loaded_metadata_ptr_->tile_var_offsets()[idx].size(); + loaded_metadata->tile_var_offsets()[idx].size(); serializer.write(tile_var_offsets_num); // Write tile offsets if (tile_var_offsets_num != 0) { serializer.write( - &loaded_metadata_ptr_->tile_var_offsets()[idx][0], + &loaded_metadata->tile_var_offsets()[idx][0], tile_var_offsets_num * sizeof(uint64_t)); } } void FragmentMetadata::store_tile_var_sizes( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_var_sizes(idx, size_computation_serializer); @@ -2867,22 +2544,26 @@ void FragmentMetadata::store_tile_var_sizes( } void FragmentMetadata::write_tile_var_sizes( - unsigned idx, Serializer& serializer) { + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write number of sizes - uint64_t tile_var_sizes_num = - loaded_metadata_ptr_->tile_var_sizes()[idx].size(); + uint64_t tile_var_sizes_num = loaded_metadata->tile_var_sizes()[idx].size(); serializer.write(tile_var_sizes_num); // Write tile sizes if (tile_var_sizes_num != 0) { serializer.write( - &loaded_metadata_ptr_->tile_var_sizes()[idx][0], + &loaded_metadata->tile_var_sizes()[idx][0], tile_var_sizes_num * sizeof(uint64_t)); } } void FragmentMetadata::store_tile_validity_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_validity_offsets(idx, size_computation_serializer); @@ -2897,22 +2578,27 @@ void FragmentMetadata::store_tile_validity_offsets( } void FragmentMetadata::write_tile_validity_offsets( - unsigned idx, Serializer& serializer) { + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write number of tile offsets uint64_t tile_validity_offsets_num = - loaded_metadata_ptr_->tile_validity_offsets()[idx].size(); + loaded_metadata->tile_validity_offsets()[idx].size(); serializer.write(tile_validity_offsets_num); // Write tile validity offsets if (tile_validity_offsets_num != 0) { serializer.write( - &loaded_metadata_ptr_->tile_validity_offsets()[idx][0], + &loaded_metadata->tile_validity_offsets()[idx][0], tile_validity_offsets_num * sizeof(uint64_t)); } } void FragmentMetadata::store_tile_mins( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_mins(idx, size_computation_serializer); @@ -2926,34 +2612,39 @@ void FragmentMetadata::store_tile_mins( resources_->stats().add_counter("write_mins_size", *nbytes); } -void FragmentMetadata::write_tile_mins(unsigned idx, Serializer& serializer) { +void FragmentMetadata::write_tile_mins( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write size of buffer uint64_t tile_mins_buffer_size = - loaded_metadata_ptr_->tile_min_buffer()[idx].size(); + loaded_metadata->tile_min_buffer()[idx].size(); serializer.write(tile_mins_buffer_size); // Write size of buffer var uint64_t tile_mins_var_buffer_size = - loaded_metadata_ptr_->tile_min_var_buffer()[idx].size(); + loaded_metadata->tile_min_var_buffer()[idx].size(); serializer.write(tile_mins_var_buffer_size); // Write tile buffer if (tile_mins_buffer_size != 0) { serializer.write( - &loaded_metadata_ptr_->tile_min_buffer()[idx][0], - tile_mins_buffer_size); + &loaded_metadata->tile_min_buffer()[idx][0], tile_mins_buffer_size); } // Write tile var buffer if (tile_mins_var_buffer_size != 0) { serializer.write( - &loaded_metadata_ptr_->tile_min_var_buffer()[idx][0], + &loaded_metadata->tile_min_var_buffer()[idx][0], tile_mins_var_buffer_size); } } void FragmentMetadata::store_tile_maxs( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_maxs(idx, size_computation_serializer); @@ -2967,34 +2658,39 @@ void FragmentMetadata::store_tile_maxs( resources_->stats().add_counter("write_maxs_size", *nbytes); } -void FragmentMetadata::write_tile_maxs(unsigned idx, Serializer& serializer) { +void FragmentMetadata::write_tile_maxs( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write size of buffer uint64_t tile_maxs_buffer_size = - loaded_metadata_ptr_->tile_max_buffer()[idx].size(); + loaded_metadata->tile_max_buffer()[idx].size(); serializer.write(tile_maxs_buffer_size); // Write size of buffer var uint64_t tile_maxs_var_buffer_size = - loaded_metadata_ptr_->tile_max_var_buffer()[idx].size(); + loaded_metadata->tile_max_var_buffer()[idx].size(); serializer.write(tile_maxs_var_buffer_size); // Write tile buffer if (tile_maxs_buffer_size != 0) { serializer.write( - &loaded_metadata_ptr_->tile_max_buffer()[idx][0], - tile_maxs_buffer_size); + &loaded_metadata->tile_max_buffer()[idx][0], tile_maxs_buffer_size); } // Write tile var buffer if (tile_maxs_var_buffer_size != 0) { serializer.write( - &loaded_metadata_ptr_->tile_max_var_buffer()[idx][0], + &loaded_metadata->tile_max_var_buffer()[idx][0], tile_maxs_var_buffer_size); } } void FragmentMetadata::store_tile_sums( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_sums(idx, size_computation_serializer); @@ -3008,22 +2704,28 @@ void FragmentMetadata::store_tile_sums( resources_->stats().add_counter("write_sums_size", *nbytes); } -void FragmentMetadata::write_tile_sums(unsigned idx, Serializer& serializer) { +void FragmentMetadata::write_tile_sums( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write number of tile sums uint64_t tile_sums_num = - loaded_metadata_ptr_->tile_sums()[idx].size() / sizeof(uint64_t); + loaded_metadata->tile_sums()[idx].size() / sizeof(uint64_t); serializer.write(tile_sums_num); // Write tile sums if (tile_sums_num != 0) { serializer.write( - loaded_metadata_ptr_->tile_sums()[idx].data(), + loaded_metadata->tile_sums()[idx].data(), tile_sums_num * sizeof(uint64_t)); } } void FragmentMetadata::store_tile_null_counts( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { SizeComputationSerializer size_computation_serializer; write_tile_null_counts(idx, size_computation_serializer); @@ -3038,47 +2740,49 @@ void FragmentMetadata::store_tile_null_counts( } void FragmentMetadata::write_tile_null_counts( - unsigned idx, Serializer& serializer) { + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer) { // Write number of tile null counts uint64_t tile_null_counts_num = - loaded_metadata_ptr_->tile_null_counts()[idx].size(); + loaded_metadata->tile_null_counts()[idx].size(); serializer.write(tile_null_counts_num); // Write tile null counts if (tile_null_counts_num != 0) { serializer.write( - &loaded_metadata_ptr_->tile_null_counts()[idx][0], + &loaded_metadata->tile_null_counts()[idx][0], tile_null_counts_num * sizeof(uint64_t)); } } void FragmentMetadata::store_fragment_min_max_sum_null_count( - uint64_t num, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + uint64_t num, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { Buffer buff; auto serialize_data = [&](Serializer& serializer) { // Store all attributes. for (unsigned int i = 0; i < num; ++i) { // Store min. - uint64_t min_size = loaded_metadata_ptr_->fragment_mins()[i].size(); + uint64_t min_size = loaded_metadata->fragment_mins()[i].size(); serializer.write(min_size); - serializer.write( - loaded_metadata_ptr_->fragment_mins()[i].data(), min_size); + serializer.write(loaded_metadata->fragment_mins()[i].data(), min_size); // Store max. - uint64_t max_size = loaded_metadata_ptr_->fragment_maxs()[i].size(); + uint64_t max_size = loaded_metadata->fragment_maxs()[i].size(); serializer.write(max_size); - serializer.write( - loaded_metadata_ptr_->fragment_maxs()[i].data(), max_size); + serializer.write(loaded_metadata->fragment_maxs()[i].data(), max_size); // Store sum. - serializer.write(loaded_metadata_ptr_->fragment_sums()[i]); + serializer.write(loaded_metadata->fragment_sums()[i]); // Store null count. - serializer.write( - loaded_metadata_ptr_->fragment_null_counts()[i]); + serializer.write(loaded_metadata->fragment_null_counts()[i]); } }; @@ -3096,14 +2800,15 @@ void FragmentMetadata::store_fragment_min_max_sum_null_count( } void FragmentMetadata::store_processed_conditions( - const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { auto serialize_processed_conditions = [this](Serializer& serializer) { // Store num conditions. - uint64_t num = loaded_metadata_ptr_->processed_conditions().size(); + uint64_t num = loaded_metadata->processed_conditions().size(); serializer.write(num); - for (auto& processed_condition : - loaded_metadata_ptr_->processed_conditions()) { + for (auto& processed_condition : loaded_metadata->processed_conditions()) { uint64_t size = processed_condition.size(); serializer.write(size); @@ -3123,312 +2828,6 @@ void FragmentMetadata::store_processed_conditions( resources_->stats().add_counter("write_processed_conditions_size", *nbytes); } -template -void FragmentMetadata::compute_fragment_min_max_sum(const std::string& name) { - // For easy reference. - const auto& idx = idx_map_[name]; - const auto nullable = array_schema_->is_nullable(name); - const auto is_dim = array_schema_->is_dim(name); - const auto type = array_schema_->type(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - - // No metadata for dense coords - if (!array_schema_->dense() || !is_dim) { - const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( - type, is_dim, false, cell_val_num); - const auto has_sum = - TileMetadataGenerator::has_sum_metadata(type, false, cell_val_num); - - if (has_min_max) { - // Initialize defaults. - T min = metadata_generator_type_data::min; - T max = metadata_generator_type_data::max; - - // Get data and tile num. - auto min_values = static_cast(static_cast( - loaded_metadata_ptr_->tile_min_buffer()[idx].data())); - auto max_values = static_cast(static_cast( - loaded_metadata_ptr_->tile_max_buffer()[idx].data())); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile. - for (uint64_t t = 0; t < tile_num; t++) { - const bool is_null = nullable && null_count_values[t] == cell_num(t); - if (!is_null) { - min = min < min_values[t] ? min : min_values[t]; - max = max > max_values[t] ? max : max_values[t]; - } - } - - // Copy min max values. - loaded_metadata_ptr_->fragment_mins()[idx].resize(sizeof(T)); - loaded_metadata_ptr_->fragment_maxs()[idx].resize(sizeof(T)); - memcpy( - loaded_metadata_ptr_->fragment_mins()[idx].data(), &min, sizeof(T)); - memcpy( - loaded_metadata_ptr_->fragment_maxs()[idx].data(), &max, sizeof(T)); - } - - if (has_sum) { - compute_fragment_sum::sum_type>( - idx, nullable); - } - } -} - -template <> -void FragmentMetadata::compute_fragment_min_max_sum( - const std::string& name) { - // For easy reference. - const auto idx = idx_map_[name]; - const auto nullable = array_schema_->is_nullable(name); - const auto is_dim = array_schema_->is_dim(name); - const auto type = array_schema_->type(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - - // Return if there's no min/max. - const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( - type, is_dim, false, cell_val_num); - if (!has_min_max) - return; - - // Initialize to null. - void* min = nullptr; - void* max = nullptr; - - // Get data and tile num. - auto min_values = loaded_metadata_ptr_->tile_min_buffer()[idx].data(); - auto max_values = loaded_metadata_ptr_->tile_max_buffer()[idx].data(); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile. - for (uint64_t t = 0; t < tile_num; t++) { - if (!nullable || null_count_values[t] != cell_num(t)) { - min = (min == nullptr || - strncmp((const char*)min, (const char*)min_values, cell_val_num) > - 0) ? - min_values : - min; - min_values += cell_val_num; - max = (max == nullptr || - strncmp((const char*)max, (const char*)max_values, cell_val_num) < - 0) ? - max_values : - max; - max_values += cell_val_num; - } - } - - // Copy values. - if (min != nullptr) { - loaded_metadata_ptr_->fragment_mins()[idx].resize(cell_val_num); - memcpy( - loaded_metadata_ptr_->fragment_mins()[idx].data(), min, cell_val_num); - } - - if (max != nullptr) { - loaded_metadata_ptr_->fragment_maxs()[idx].resize(cell_val_num); - memcpy( - loaded_metadata_ptr_->fragment_maxs()[idx].data(), max, cell_val_num); - } -} - -template <> -void FragmentMetadata::compute_fragment_sum( - const uint64_t idx, const bool nullable) { - // Zero sum. - int64_t sum_data = 0; - - // Get data and tile num. - auto values = static_cast( - static_cast(loaded_metadata_ptr_->tile_sums()[idx].data())); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile, swallowing overflow exception. - for (uint64_t t = 0; t < tile_num; t++) { - if (!nullable || null_count_values[t] != cell_num(t)) { - if (sum_data > 0 && values[t] > 0 && - (sum_data > std::numeric_limits::max() - values[t])) { - sum_data = std::numeric_limits::max(); - break; - } - - if (sum_data < 0 && values[t] < 0 && - (sum_data < std::numeric_limits::min() - values[t])) { - sum_data = std::numeric_limits::min(); - break; - } - - sum_data += values[t]; - } - } - - // Copy value. - memcpy( - &loaded_metadata_ptr_->fragment_sums()[idx], &sum_data, sizeof(int64_t)); -} - -template <> -void FragmentMetadata::compute_fragment_sum( - const uint64_t idx, const bool nullable) { - // Zero sum. - uint64_t sum_data = 0; - - // Get data and tile num. - auto values = static_cast( - static_cast(loaded_metadata_ptr_->tile_sums()[idx].data())); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile, swallowing overflow exception. - for (uint64_t t = 0; t < tile_num; t++) { - if (!nullable || null_count_values[t] != cell_num(t)) { - if (sum_data > std::numeric_limits::max() - values[t]) { - sum_data = std::numeric_limits::max(); - break; - } - - sum_data += values[t]; - } - } - - // Copy value. - memcpy( - &loaded_metadata_ptr_->fragment_sums()[idx], &sum_data, sizeof(uint64_t)); -} - -template <> -void FragmentMetadata::compute_fragment_sum( - const uint64_t idx, const bool nullable) { - // Zero sum. - double sum_data = 0; - - // Get data and tile num. - auto values = static_cast( - static_cast(loaded_metadata_ptr_->tile_sums()[idx].data())); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile, swallowing overflow exception. - for (uint64_t t = 0; t < tile_num; t++) { - if (!nullable || null_count_values[t] != cell_num(t)) { - if ((sum_data < 0.0) == (values[t] < 0.0) && - std::abs(sum_data) > - std::numeric_limits::max() - std::abs(values[t])) { - sum_data = sum_data < 0.0 ? std::numeric_limits::lowest() : - std::numeric_limits::max(); - break; - } - - sum_data += values[t]; - } - } - - // Copy value. - memcpy( - &loaded_metadata_ptr_->fragment_sums()[idx], &sum_data, sizeof(double)); -} - -void FragmentMetadata::min_max_var(const std::string& name) { - // For easy reference. - const auto nullable = array_schema_->is_nullable(name); - const auto is_dim = array_schema_->is_dim(name); - const auto type = array_schema_->type(name); - const auto cell_val_num = array_schema_->cell_val_num(name); - const auto idx = idx_map_[name]; - - // Return if there's no min/max. - const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( - type, is_dim, true, cell_val_num); - if (!has_min_max) - return; - - // Initialize to null. - void* min = nullptr; - void* max = nullptr; - uint64_t min_size = 0; - uint64_t max_size = 0; - - // Get data and tile num. - auto min_offsets = static_cast( - static_cast(loaded_metadata_ptr_->tile_min_buffer()[idx].data())); - auto max_offsets = static_cast( - static_cast(loaded_metadata_ptr_->tile_max_buffer()[idx].data())); - auto min_values = loaded_metadata_ptr_->tile_min_var_buffer()[idx].data(); - auto max_values = loaded_metadata_ptr_->tile_max_var_buffer()[idx].data(); - auto& null_count_values = loaded_metadata_ptr_->tile_null_counts()[idx]; - auto tile_num = this->tile_num(); - - // Process tile by tile. - for (uint64_t t = 0; t < tile_num; t++) { - if (!nullable || null_count_values[t] != cell_num(t)) { - auto min_value = min_values + min_offsets[t]; - auto min_value_size = - t == tile_num - 1 ? - loaded_metadata_ptr_->tile_min_var_buffer()[idx].size() - - min_offsets[t] : - min_offsets[t + 1] - min_offsets[t]; - auto max_value = max_values + max_offsets[t]; - auto max_value_size = - t == tile_num - 1 ? - loaded_metadata_ptr_->tile_max_var_buffer()[idx].size() - - max_offsets[t] : - max_offsets[t + 1] - max_offsets[t]; - if (min == nullptr && max == nullptr) { - min = min_value; - min_size = min_value_size; - max = max_value; - max_size = max_value_size; - } else { - // Process min. - size_t min_cmp_size = std::min(min_size, min_value_size); - int cmp = - strncmp(static_cast(min), min_value, min_cmp_size); - if (cmp != 0) { - if (cmp > 0) { - min = min_value; - min_size = min_value_size; - } - } else { - if (min_value_size < min_size) { - min = min_value; - min_size = min_value_size; - } - } - - // Process max. - size_t max_cmp_size = std::min(max_size, max_value_size); - cmp = strncmp(static_cast(max), max_value, max_cmp_size); - if (cmp != 0) { - if (cmp < 0) { - max = max_value; - max_size = max_value_size; - } - } else { - if (max_value_size > max_size) { - max = max_value; - max_size = max_value_size; - } - } - } - } - } - - // Copy values. - if (min != nullptr) { - loaded_metadata_ptr_->fragment_mins()[idx].resize(min_size); - memcpy(loaded_metadata_ptr_->fragment_mins()[idx].data(), min, min_size); - } - - if (max != nullptr) { - loaded_metadata_ptr_->fragment_maxs()[idx].resize(max_size); - memcpy(loaded_metadata_ptr_->fragment_maxs()[idx].data(), max, max_size); - } -} - void FragmentMetadata::write_version(Serializer& serializer) const { serializer.write(version_); } @@ -3537,53 +2936,5 @@ FragmentMetadata::compute_overlapping_tile_ids_cov( template std::vector> FragmentMetadata::compute_overlapping_tile_ids_cov( const double* subarray) const; -template int8_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template uint8_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template int16_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template uint16_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template int32_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template uint32_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template int64_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template char FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template uint64_t FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template float FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template double FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template std::byte FragmentMetadata::get_tile_min_as( - const std::string& name, uint64_t tile_idx) const; -template int8_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template uint8_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template int16_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template uint16_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template int32_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template uint32_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template int64_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template uint64_t FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template float FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template double FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template std::byte FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; -template char FragmentMetadata::get_tile_max_as( - const std::string& name, uint64_t tile_idx) const; } // namespace tiledb::sm diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 931d6d0099f..3a01853f5b5 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -402,36 +402,82 @@ class FragmentMetadata { const std::unordered_map>& offsets); - /** Stores all the metadata to storage. */ - void store(const EncryptionKey& encryption_key); + /** + * Writes the R-tree to a tile. + * @param loaded_metadata The loaded fragment metadata. + */ + shared_ptr write_rtree( + shared_ptr loaded_metadata); + + /** + * Writes the R-tree to storage. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. + * @param nbytes The total number of bytes written for the R-tree. + */ + void store_rtree( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key, + uint64_t* nbytes); + + /** + * Stores all the metadata to storage. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. + */ + void store( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key); /** * Stores all the metadata to storage. * * Applicable to format versions 7 to 10. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. */ - void store_v7_v10(const EncryptionKey& encryption_key); + void store_v7_v10( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key); /** * Stores all the metadata to storage. * * Applicable to format versions 11. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. */ - void store_v11(const EncryptionKey& encryption_key); + void store_v11( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key); /** * Stores all the metadata to storage. * * Applicable to format versions 12 or higher. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. */ - void store_v12_v14(const EncryptionKey& encryption_key); + void store_v12_v14( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key); /** * Stores all the metadata to storage. * * Applicable to format versions 15 or higher. + * + * @param loaded_metadata The loaded fragment metadata. + * @param encryption_key The encryption key. */ - void store_v15_or_higher(const EncryptionKey& encryption_key); + void store_v15_or_higher( + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key); /** * Simply sets the number of cells for the last tile. @@ -441,15 +487,6 @@ class FragmentMetadata { */ void set_last_tile_cell_num(uint64_t cell_num); - /** - * Sets the input tile's MBR in the fragment metadata. It also expands the - * non-empty domain of the fragment. - * - * @param tile The tile index whose MBR will be set. - * @param mbr The MBR to be set. - */ - void set_mbr(uint64_t tile, const NDRange& mbr); - /** * Resizes the per-tile metadata vectors for the given number of tiles. This * is not serialized, and is only used during writes. @@ -477,9 +514,14 @@ class FragmentMetadata { * @param tid The index of the tile for which the offset is set. * @param step This is essentially the step by which the previous * offset will be expanded. It is practically the last tile size. + * @param tile_offsets The tile offsets to set. * @return void */ - void set_tile_offset(const std::string& name, uint64_t tid, uint64_t step); + void set_tile_offset( + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_offsets); /** * Sets a variable tile offset for the input attribute or dimension. @@ -488,10 +530,14 @@ class FragmentMetadata { * @param tid The index of the tile for which the offset is set. * @param step This is essentially the step by which the previous * offset will be expanded. It is practically the last variable tile size. + * @param tile_var_offsets The variable tile offsets to set. * @return void */ void set_tile_var_offset( - const std::string& name, uint64_t tid, uint64_t step); + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_var_offsets); /** * Sets a variable tile size for the input attribute or dimension. @@ -499,9 +545,14 @@ class FragmentMetadata { * @param name The attribute/dimension for which the size is set. * @param tid The index of the tile for which the offset is set. * @param size The size to be appended. + * @param tile_var_sizes The variable tile sizes to set. * @return void */ - void set_tile_var_size(const std::string& name, uint64_t tid, uint64_t size); + void set_tile_var_size( + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_var_sizes); /** * Sets a validity tile offset for the input attribute. @@ -510,10 +561,14 @@ class FragmentMetadata { * @param tid The index of the tile for which the offset is set. * @param step This is essentially the step by which the previous * offset will be expanded. It is practically the last tile size. + * @param tile_validity_offsets The validity tile offsets to set. * @return void */ void set_tile_validity_offset( - const std::string& name, uint64_t tid, uint64_t step); + const std::string& name, + uint64_t tid, + uint64_t step, + tdb::pmr::vector>& tile_validity_offsets); /** * Sets a tile min for the fixed input attribute. @@ -521,9 +576,14 @@ class FragmentMetadata { * @param name The attribute for which the min is set. * @param tid The index of the tile for which the min is set. * @param min The minimum. + * @param tile_min_buffer The tile min buffer. * @return void */ - void set_tile_min(const std::string& name, uint64_t tid, const ByteVec& min); + void set_tile_min( + const std::string& name, + uint64_t tid, + const ByteVec& min, + tdb::pmr::vector>& tile_min_buffer); /** * Sets a tile min size for the var input attribute. @@ -531,10 +591,14 @@ class FragmentMetadata { * @param name The attribute for which the min size is set. * @param tid The index of the tile for which the min is set. * @param size The size. + * @param tile_min_buffer The tile min buffer. * @return void */ void set_tile_min_var_size( - const std::string& name, uint64_t tid, uint64_t size); + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_min_buffer); /** * Sets a tile min for the var input attribute. @@ -542,10 +606,16 @@ class FragmentMetadata { * @param name The attribute for which the min is set. * @param tid The index of the tile for which the min is set. * @param min The minimum. + * @param tile_min_buffer The tile min buffer. + * @param tile_min_var_buffer The tile min var buffer. * @return void */ void set_tile_min_var( - const std::string& name, uint64_t tid, const ByteVec& min); + const std::string& name, + uint64_t tid, + const ByteVec& min, + tdb::pmr::vector>& tile_min_buffer, + tdb::pmr::vector>& tile_min_var_buffer); /** * Sets a tile max for the input attribute. @@ -553,9 +623,14 @@ class FragmentMetadata { * @param name The attribute for which the max is set. * @param tid The index of the tile for which the max is set. * @param max The maximum. + * @param tile_max_buffer The tile max buffer. * @return void */ - void set_tile_max(const std::string& name, uint64_t tid, const ByteVec& max); + void set_tile_max( + const std::string& name, + uint64_t tid, + const ByteVec& max, + tdb::pmr::vector>& tile_max_buffer); /** * Sets a tile max for the var input attribute. @@ -563,10 +638,14 @@ class FragmentMetadata { * @param name The attribute for which the min size is set. * @param tid The index of the tile for which the min is set. * @param size The size. + * @param tile_max_buffer The tile max buffer. * @return void */ void set_tile_max_var_size( - const std::string& name, uint64_t tid, uint64_t size); + const std::string& name, + uint64_t tid, + uint64_t size, + tdb::pmr::vector>& tile_max_buffer); /** * Sets a tile max for the var input attribute. @@ -574,18 +653,33 @@ class FragmentMetadata { * @param name The attribute for which the min is set. * @param tid The index of the tile for which the min is set. * @param max The maximum. + * @param tile_max_buffer The tile max buffer. + * @param tile_max_var_buffer The tile max var buffer. * @return void */ void set_tile_max_var( - const std::string& name, uint64_t tid, const ByteVec& max); + const std::string& name, + uint64_t tid, + const ByteVec& max, + tdb::pmr::vector>& tile_max_buffer, + tdb::pmr::vector>& tile_max_var_buffer); /** * Converts min/max sizes to offsets. * * @param name The attribute for which the offsets are converted + * @param tile_min_var_buffer The tile min var buffer. + * @param tile_min_buffer The tile min buffer. + * @param tile_max_var_buffer The tile max var buffer. + * @param tile_max_buffer The tile max buffer. * @return void */ - void convert_tile_min_max_var_sizes_to_offsets(const std::string& name); + void convert_tile_min_max_var_sizes_to_offsets( + const std::string& name, + tdb::pmr::vector>& tile_min_var_buffer, + tdb::pmr::vector>& tile_min_buffer, + tdb::pmr::vector>& tile_max_var_buffer, + tdb::pmr::vector>& tile_max_buffer); /** * Sets a tile sum for the input attribute. @@ -593,9 +687,14 @@ class FragmentMetadata { * @param name The attribute for which the sum is set. * @param tid The index of the tile for which the sum is set. * @param sum The sum. + * @param tile_sums The tile sums to set. * @return void */ - void set_tile_sum(const std::string& name, uint64_t tid, const ByteVec& sum); + void set_tile_sum( + const std::string& name, + uint64_t tid, + const ByteVec& sum, + tdb::pmr::vector>& tile_sums); /** * Sets a tile null count for the input attribute. @@ -603,15 +702,14 @@ class FragmentMetadata { * @param name The attribute for which the null count is set. * @param tid The index of the tile for which the null count is set. * @param sum The null count. + * @param tile_null_counts The tile null counts to set. * @return void */ void set_tile_null_count( - const std::string& name, uint64_t tid, uint64_t null_count); - - /** - * Compute fragment min, max, sum, null count for all dimensions/attributes. - */ - void compute_fragment_min_max_sum_null_count(); + const std::string& name, + uint64_t tid, + uint64_t null_count, + tdb::pmr::vector>& tile_null_counts); /** * Sets array schema pointer. @@ -644,12 +742,6 @@ class FragmentMetadata { uint64_t footer_size() const; - /** Returns the MBR of the input tile. */ - const NDRange& mbr(uint64_t tile_idx) const; - - /** Returns all the MBRs of all tiles in the fragment. */ - const tdb::pmr::vector& mbrs() const; - /** * Returns the (uncompressed) tile size for a given attribute or dimension * and tile index. If the attribute/dimension is var-sized, this will return @@ -661,47 +753,6 @@ class FragmentMetadata { */ uint64_t tile_size(const std::string& name, uint64_t tile_idx) const; - /** - * Retrieves the tile min value for a given attribute or dimension and tile - * index. - * - * @param name The input attribute/dimension. - * @param tile_idx The index of the tile in the metadata. - * @return Value. - */ - template - T get_tile_min_as(const std::string& name, uint64_t tile_idx) const; - - /** - * Retrieves the tile max value for a given attribute or dimension and tile - * index. - * - * @tparam Type to return the data as. - * @param name The input attribute/dimension. - * @param tile_idx The index of the tile in the metadata. - * @return Value. - */ - template - T get_tile_max_as(const std::string& name, uint64_t tile_idx) const; - - /** - * Returns the tile metadata for a tile. - * - * @param name Name of the attribute to get the data for. - * @param tile_idx Tile index. - */ - TileMetadata get_tile_metadata( - const std::string& name, const uint64_t tile_idx) const; - - /** - * Set the processed conditions. The processed conditions is the list - * of delete/update conditions that have already been applied for this - * fragment and don't need to be applied again. - * - * @param processed_conditions The processed conditions. - */ - void set_processed_conditions(std::vector& processed_conditions); - /** Returns the first timestamp of the fragment timestamp range. */ uint64_t first_timestamp() const; @@ -1189,192 +1240,220 @@ class FragmentMetadata { */ void write_has_delete_meta(Serializer& serializer) const; - /** - * Writes the R-tree to storage. - * - * @param encryption_key The encryption key. - * @param nbytes The total number of bytes written for the R-tree. - */ - void store_rtree(const EncryptionKey& encryption_key, uint64_t* nbytes); - /** Stores a footer with the basic information. */ void store_footer(const EncryptionKey& encryption_key); - /** Writes the R-tree to a tile. */ - shared_ptr write_rtree(); - /** Writes the non-empty domain to the input buffer. */ void write_non_empty_domain(Serializer& serializer) const; /** * Writes the tile offsets of the input attribute or dimension to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile offsets. */ void store_tile_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the tile offsets of the input attribute or dimension idx to the * input buffer. */ - void write_tile_offsets(unsigned idx, Serializer& serializer); + void write_tile_offsets( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the variable tile offsets of the input attribute or dimension * to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile var offsets. */ void store_tile_var_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the variable tile offsets of the input attribute or dimension idx * to the buffer. */ - void write_tile_var_offsets(unsigned idx, Serializer& serializer); + void write_tile_var_offsets( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the variable tile sizes for the input attribute or dimension to * the buffer. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute or dimension. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the tile var sizes. */ void store_tile_var_sizes( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the variable tile sizes for the input attribute or dimension * to storage. */ - void write_tile_var_sizes(unsigned idx, Serializer& serializer); + void write_tile_var_sizes( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the validity tile offsets of the input attribute to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the validity tile * offsets. */ void store_tile_validity_offsets( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the validity tile offsets of the input attribute idx to the * input buffer. */ - void write_tile_validity_offsets(unsigned idx, Serializer& serializer); + void write_tile_validity_offsets( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the mins of the input attribute to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the mins. */ void store_tile_mins( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the mins of the input attribute idx to the input buffer. */ - void write_tile_mins(unsigned idx, Serializer& serializer); + void write_tile_mins( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the maxs of the input attribute to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the maxs. */ void store_tile_maxs( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the maxs of the input attribute idx to the input buffer. */ - void write_tile_maxs(unsigned idx, Serializer& serializer); + void write_tile_maxs( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the sums of the input attribute to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the sums. */ void store_tile_sums( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the sums of the input attribute idx to the input buffer. */ - void write_tile_sums(unsigned idx, Serializer& serializer); + void write_tile_sums( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the null counts of the input attribute to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param idx The index of the attribute. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written for the null counts. */ void store_tile_null_counts( - unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + unsigned idx, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the null counts of the input attribute idx to the input buffer. */ - void write_tile_null_counts(unsigned idx, Serializer& serializer); + void write_tile_null_counts( + shared_ptr loaded_metadata, + unsigned idx, + Serializer& serializer); /** * Writes the fragment min, max, sum and null count to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param num The number of attributes. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written. */ void store_fragment_min_max_sum_null_count( - uint64_t num, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr loaded_metadata, + uint64_t num, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Writes the processed conditions to storage. * + * @param loaded_metadata The loaded fragment metadata. * @param encryption_key The encryption key. * @param nbytes The total number of bytes written. */ void store_processed_conditions( - const EncryptionKey& encryption_key, uint64_t* nbytes); - - /** - * Compute the fragment min, max and sum values. - * - * @param name The attribute/dimension name. - */ - template - void compute_fragment_min_max_sum(const std::string& name); - - /** - * Compute the fragment sum value. - * - * @param idx The attribute/dimension index. - * @param nullable Is the attribute/dimension nullable. - */ - template - void compute_fragment_sum(const uint64_t idx, const bool nullable); - - /** - * Compute the fragment min and max values for var sized attributes. - * - * @param name The attribute/dimension name. - */ - void min_max_var(const std::string& name); + shared_ptr loaded_metadata, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** Writes the format version to the buffer. */ void write_version(Serializer& serializer) const; diff --git a/tiledb/sm/fragment/loaded_fragment_metadata.cc b/tiledb/sm/fragment/loaded_fragment_metadata.cc index b4e2776038f..656545e5fca 100644 --- a/tiledb/sm/fragment/loaded_fragment_metadata.cc +++ b/tiledb/sm/fragment/loaded_fragment_metadata.cc @@ -538,8 +538,699 @@ void LoadedFragmentMetadata::free_rtree() { loaded_metadata_.rtree_ = false; } +void LoadedFragmentMetadata::set_mbr( + uint64_t base, uint64_t tile, const NDRange& mbr) { + rtree_.set_leaf(tile + base, mbr); +} + +template +T LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const { + const auto var_size = parent_fragment_.array_schema_->var_size(name); + if (var_size) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata as wrong type"); + } + + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (!loaded_metadata_.tile_min_[idx]) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata that's not loaded"); + } + + const auto type = parent_fragment_.array_schema_->type(name); + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + if (!TileMetadataGenerator::has_min_max_metadata( + type, is_dim, var_size, cell_val_num)) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata that's not present"); + } + + auto size = parent_fragment_.array_schema_->cell_size(name); + const void* min = &tile_min_buffer()[idx][tile_idx * size]; + if constexpr (std::is_same_v) { + return min; + } else { + return *static_cast(min); + } +} + +template <> +std::string_view LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const { + const auto type = parent_fragment_.array_schema_->type(name); + const auto var_size = parent_fragment_.array_schema_->var_size(name); + if (!var_size && type != Datatype::STRING_ASCII && type != Datatype::CHAR) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata as wrong type"); + } + + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (!loaded_metadata_.tile_min_[idx]) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata that's not loaded"); + } + + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + if (!TileMetadataGenerator::has_min_max_metadata( + type, is_dim, var_size, cell_val_num)) { + throw FragmentMetadataStatusException( + "Trying to access tile min metadata that's not present"); + } + + using sv_size_cast = std::string_view::size_type; + if (var_size) { + auto tile_num = this->tile_num(); + auto offsets = (uint64_t*)tile_min_buffer_[idx].data(); + auto min_offset = offsets[tile_idx]; + auto size = + tile_idx == tile_num - 1 ? + static_cast( + tile_min_var_buffer_()[idx].size() - min_offset) : + static_cast(offsets[tile_idx + 1] - min_offset); + if (size == 0) { + return {}; + } + + const char* min = &tile_min_var_buffer_[idx][min_offset]; + return {min, size}; + } else { + auto size = static_cast( + parent_fragment_.array_schema_->cell_size(name)); + const void* min = &tile_min_buffer_[idx][tile_idx * size]; + return {static_cast(min), size}; + } +} + +TileMetadata LoadedFragmentMetadata::get_tile_metadata( + const std::string& name, const uint64_t tile_idx) const { + auto var_size = parent_fragment_.array_schema_->var_size(name); + auto is_dim = parent_fragment_.array_schema_->is_dim(name); + auto count = parent_fragment_.cell_num(tile_idx); + + if (name == constants::count_of_rows) { + return {count, 0, nullptr, 0, nullptr, 0, nullptr}; + } + + uint64_t null_count = 0; + if (parent_fragment_.array_schema_->is_nullable(name)) { + null_count = get_tile_null_count(name, tile_idx); + } + + unsigned dim_idx = 0; + const NDRange* mbr = nullptr; + if (is_dim) { + dim_idx = + parent_fragment_.array_schema_->domain().get_dimension_index(name); + mbr = &rtree().leaf(tile_idx); + } + + if (var_size) { + std::string_view min = + is_dim ? mbr->at(dim_idx).start_str() : + get_tile_min_as(name, tile_idx); + std::string_view max = + is_dim ? mbr->at(dim_idx).end_str() : + get_tile_max_as(name, tile_idx); + return { + count, + null_count, + min.data(), + min.size(), + max.data(), + max.size(), + nullptr}; + } else { + auto cell_size = parent_fragment_.array_schema_->cell_size(name); + const void* min = is_dim ? mbr->at(dim_idx).start_fixed() : + get_tile_min_as(name, tile_idx); + const void* max = is_dim ? mbr->at(dim_idx).end_fixed() : + get_tile_max_as(name, tile_idx); + + const auto type = parent_fragment_.array_schema_->type(name); + const auto cell_val_num = + parent_fragment_.array_schema_->cell_val_num(name); + const void* sum = nullptr; + if (TileMetadataGenerator::has_sum_metadata(type, false, cell_val_num)) { + sum = get_tile_sum(name, tile_idx); + } + + return {count, null_count, min, cell_size, max, cell_size, sum}; + } +} +template +T LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const { + const auto var_size = parent_fragment_.array_schema_->var_size(name); + if (var_size) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata as wrong type"); + } + + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (loaded_metadata_.tile_max_[idx]) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata that's not loaded"); + } + + const auto type = parent_fragment_.array_schema_->type(name); + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + if (!TileMetadataGenerator::has_min_max_metadata( + type, is_dim, var_size, cell_val_num)) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata that's not present"); + } + + auto size = parent_fragment_.array_schema_->cell_size(name); + const void* max = &tile_max_buffer_[idx][tile_idx * size]; + if constexpr (std::is_same_v) { + return max; + } else { + return *static_cast(max); + } +} + +template <> +std::string_view LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const { + const auto type = parent_fragment_.array_schema_->type(name); + const auto var_size = parent_fragment_.array_schema_->var_size(name); + if (!var_size && type != Datatype::STRING_ASCII && type != Datatype::CHAR) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata as wrong type"); + } + + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (!loaded_metadata_.tile_max_[idx]) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata that's not loaded"); + } + + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + if (!TileMetadataGenerator::has_min_max_metadata( + type, is_dim, var_size, cell_val_num)) { + throw FragmentMetadataStatusException( + "Trying to access tile max metadata that's not present"); + } + + using sv_size_cast = std::string_view::size_type; + if (var_size) { + auto tile_num = parent_fragment_.tile_num(); + auto offsets = (uint64_t*)tile_max_buffer_[idx].data(); + auto max_offset = offsets[tile_idx]; + auto size = + tile_idx == tile_num - 1 ? + static_cast( + tile_max_var_buffer_[idx].size() - max_offset) : + static_cast(offsets[tile_idx + 1] - max_offset); + if (size == 0) { + return {}; + } + + const char* max = &tile_max_var_buffer_[idx][max_offset]; + return {max, size}; + } else { + auto size = static_cast( + parent_fragment_.array_schema_->cell_size(name)); + const void* max = &tile_max_buffer_[idx][tile_idx * size]; + return {static_cast(max), size}; + } +} + +void LoadedFragmentMetadata::set_processed_conditions( + std::vector& processed_conditions) { + processed_conditions_ = processed_conditions; + processed_conditions_set_ = std::unordered_set( + processed_conditions.begin(), processed_conditions.end()); +} + +template +void LoadedMetadataFragmentMetadata::compute_fragment_min_max_sum( + const std::string& name) { + // For easy reference. + const auto& idx = parent_fragment_.idx_map_[name]; + const auto nullable = parent_fragment_.array_schema_->is_nullable(name); + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto type = parent_fragment_.array_schema_->type(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + + // No metadata for dense coords + if (!parent_fragment_.array_schema_->dense() || !is_dim) { + const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( + type, is_dim, false, cell_val_num); + const auto has_sum = + TileMetadataGenerator::has_sum_metadata(type, false, cell_val_num); + + if (has_min_max) { + // Initialize defaults. + T min = metadata_generator_type_data::min; + T max = metadata_generator_type_data::max; + + // Get data and tile num. + auto min_values = + static_cast(static_cast(tile_min_buffer()[idx].data())); + auto max_values = + static_cast(static_cast(tile_max_buffer()[idx].data())); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = this->tile_num(); + + // Process tile by tile. + for (uint64_t t = 0; t < tile_num; t++) { + const bool is_null = nullable && null_count_values[t] == cell_num(t); + if (!is_null) { + min = min < min_values[t] ? min : min_values[t]; + max = max > max_values[t] ? max : max_values[t]; + } + } + + // Copy min max values. + fragment_mins()[idx].resize(sizeof(T)); + fragment_maxs()[idx].resize(sizeof(T)); + memcpy(fragment_mins()[idx].data(), &min, sizeof(T)); + memcpy(fragment_maxs()[idx].data(), &max, sizeof(T)); + } + + if (has_sum) { + compute_fragment_sum::sum_type>( + idx, nullable); + } + } +} + +template <> +void LoadedMetadataFragmentMetadata::compute_fragment_min_max_sum( + const std::string& name) { + // For easy reference. + const auto idx = parent_fragment_.idx_map_[name]; + const auto nullable = parent_fragment_.array_schema_->is_nullable(name); + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto type = parent_fragment_.array_schema_->type(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + + // Return if there's no min/max. + const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( + type, is_dim, false, cell_val_num); + if (!has_min_max) + return; + + // Initialize to null. + void* min = nullptr; + void* max = nullptr; + + // Get data and tile num. + auto min_values = tile_min_buffer()[idx].data(); + auto max_values = tile_max_buffer()[idx].data(); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = parent_fragment_.tile_num(); + + // Process tile by tile. + for (uint64_t t = 0; t < tile_num; t++) { + if (!nullable || null_count_values[t] != cell_num(t)) { + min = (min == nullptr || + strncmp((const char*)min, (const char*)min_values, cell_val_num) > + 0) ? + min_values : + min; + min_values += cell_val_num; + max = (max == nullptr || + strncmp((const char*)max, (const char*)max_values, cell_val_num) < + 0) ? + max_values : + max; + max_values += cell_val_num; + } + } + + // Copy values. + if (min != nullptr) { + fragment_mins()[idx].resize(cell_val_num); + memcpy(fragment_mins()[idx].data(), min, cell_val_num); + } + + if (max != nullptr) { + fragment_maxs()[idx].resize(cell_val_num); + memcpy(fragment_maxs()[idx].data(), max, cell_val_num); + } +} + +template <> +void LoadedMetadataFragmentMetadata::compute_fragment_sum( + const uint64_t idx, const bool nullable) { + // Zero sum. + int64_t sum_data = 0; + + // Get data and tile num. + auto values = + static_cast(static_cast(tile_sums()[idx].data())); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = parent_fragment_.tile_num(); + + // Process tile by tile, swallowing overflow exception. + for (uint64_t t = 0; t < tile_num; t++) { + if (!nullable || null_count_values[t] != cell_num(t)) { + if (sum_data > 0 && values[t] > 0 && + (sum_data > std::numeric_limits::max() - values[t])) { + sum_data = std::numeric_limits::max(); + break; + } + + if (sum_data < 0 && values[t] < 0 && + (sum_data < std::numeric_limits::min() - values[t])) { + sum_data = std::numeric_limits::min(); + break; + } + + sum_data += values[t]; + } + } + + // Copy value. + memcpy(&fragment_sums()[idx], &sum_data, sizeof(int64_t)); +} + +template <> +void LoadedMetadataFragmentMetadata::compute_fragment_sum( + const uint64_t idx, const bool nullable) { + // Zero sum. + uint64_t sum_data = 0; + + // Get data and tile num. + auto values = + static_cast(static_cast(tile_sums()[idx].data())); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = parent_fragment_.tile_num(); + + // Process tile by tile, swallowing overflow exception. + for (uint64_t t = 0; t < tile_num; t++) { + if (!nullable || null_count_values[t] != cell_num(t)) { + if (sum_data > std::numeric_limits::max() - values[t]) { + sum_data = std::numeric_limits::max(); + break; + } + + sum_data += values[t]; + } + } + + // Copy value. + memcpy(&fragment_sums()[idx], &sum_data, sizeof(uint64_t)); +} + +template <> +void LoadedMetadataFragmentMetadata::compute_fragment_sum( + const uint64_t idx, const bool nullable) { + // Zero sum. + double sum_data = 0; + + // Get data and tile num. + auto values = + static_cast(static_cast(tile_sums()[idx].data())); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = parent_fragment_.tile_num(); + + // Process tile by tile, swallowing overflow exception. + for (uint64_t t = 0; t < tile_num; t++) { + if (!nullable || null_count_values[t] != cell_num(t)) { + if ((sum_data < 0.0) == (values[t] < 0.0) && + std::abs(sum_data) > + std::numeric_limits::max() - std::abs(values[t])) { + sum_data = sum_data < 0.0 ? std::numeric_limits::lowest() : + std::numeric_limits::max(); + break; + } + + sum_data += values[t]; + } + } + + // Copy value. + memcpy(&fragment_sums()[idx], &sum_data, sizeof(double)); +} + +template <> +void LoadedMetadataFragmentMetadata::compute_fragment_min_max_sum( + const std::string& name); + +void LoadedMetadataFragmentMetadata::compute_fragment_min_max_sum_null_count() { + std::vector names; + names.reserve(idx_map_.size()); + for (auto& it : parent_fragment_.idx_map_) { + names.emplace_back(it.first); + } + + // Process all attributes in parallel. + throw_if_not_ok(parallel_for( + &resources_->compute_tp(), + 0, + parent_fragment_.idx_map_.size(), + [&](uint64_t n) { + // For easy reference. + const auto& name = names[n]; + const auto& idx = parent_fragment_.idx_map_[name]; + const auto var_size = parent_fragment_.array_schema_->var_size(name); + const auto type = parent_fragment_.array_schema_->type(name); + + // Compute null count. + fragment_null_counts()[idx] = std::accumulate( + tile_null_counts()[idx].begin(), tile_null_counts()[idx].end(), 0); + + if (var_size) { + min_max_var(name); + } else { + // Switch depending on datatype. + switch (type) { + case Datatype::INT8: + compute_fragment_min_max_sum(name); + break; + case Datatype::INT16: + compute_fragment_min_max_sum(name); + break; + case Datatype::INT32: + compute_fragment_min_max_sum(name); + break; + case Datatype::INT64: + compute_fragment_min_max_sum(name); + break; + case Datatype::BOOL: + case Datatype::UINT8: + compute_fragment_min_max_sum(name); + break; + case Datatype::UINT16: + compute_fragment_min_max_sum(name); + break; + case Datatype::UINT32: + compute_fragment_min_max_sum(name); + break; + case Datatype::UINT64: + compute_fragment_min_max_sum(name); + break; + case Datatype::FLOAT32: + compute_fragment_min_max_sum(name); + break; + case Datatype::FLOAT64: + compute_fragment_min_max_sum(name); + break; + case Datatype::DATETIME_YEAR: + case Datatype::DATETIME_MONTH: + case Datatype::DATETIME_WEEK: + case Datatype::DATETIME_DAY: + case Datatype::DATETIME_HR: + case Datatype::DATETIME_MIN: + case Datatype::DATETIME_SEC: + case Datatype::DATETIME_MS: + case Datatype::DATETIME_US: + case Datatype::DATETIME_NS: + case Datatype::DATETIME_PS: + case Datatype::DATETIME_FS: + case Datatype::DATETIME_AS: + case Datatype::TIME_HR: + case Datatype::TIME_MIN: + case Datatype::TIME_SEC: + case Datatype::TIME_MS: + case Datatype::TIME_US: + case Datatype::TIME_NS: + case Datatype::TIME_PS: + case Datatype::TIME_FS: + case Datatype::TIME_AS: + compute_fragment_min_max_sum(name); + break; + case Datatype::STRING_ASCII: + case Datatype::CHAR: + compute_fragment_min_max_sum(name); + break; + case Datatype::BLOB: + case Datatype::GEOM_WKB: + case Datatype::GEOM_WKT: + compute_fragment_min_max_sum(name); + break; + default: + break; + } + } + + return Status::Ok(); + })); +} + +void LoadedFragmentMetadata::min_max_var(const std::string& name) { + // For easy reference. + const auto nullable = parent_fragment_.array_schema_->is_nullable(name); + const auto is_dim = parent_fragment_.array_schema_->is_dim(name); + const auto type = parent_fragment_.array_schema_->type(name); + const auto cell_val_num = parent_fragment_.array_schema_->cell_val_num(name); + const auto idx = parent_fragment_.idx_map_[name]; + + // Return if there's no min/max. + const auto has_min_max = TileMetadataGenerator::has_min_max_metadata( + type, is_dim, true, cell_val_num); + if (!has_min_max) + return; + + // Initialize to null. + void* min = nullptr; + void* max = nullptr; + uint64_t min_size = 0; + uint64_t max_size = 0; + + // Get data and tile num. + auto min_offsets = + static_cast(static_cast(tile_min_buffer()[idx].data())); + auto max_offsets = + static_cast(static_cast(tile_max_buffer()[idx].data())); + auto min_values = tile_min_var_buffer()[idx].data(); + auto max_values = tile_max_var_buffer()[idx].data(); + auto& null_count_values = tile_null_counts()[idx]; + auto tile_num = this->tile_num(); + + // Process tile by tile. + for (uint64_t t = 0; t < tile_num; t++) { + if (!nullable || null_count_values[t] != cell_num(t)) { + auto min_value = min_values + min_offsets[t]; + auto min_value_size = + t == tile_num - 1 ? + tile_min_var_buffer()[idx].size() - min_offsets[t] : + min_offsets[t + 1] - min_offsets[t]; + auto max_value = max_values + max_offsets[t]; + auto max_value_size = + t == tile_num - 1 ? + tile_max_var_buffer()[idx].size() - max_offsets[t] : + max_offsets[t + 1] - max_offsets[t]; + if (min == nullptr && max == nullptr) { + min = min_value; + min_size = min_value_size; + max = max_value; + max_size = max_value_size; + } else { + // Process min. + size_t min_cmp_size = std::min(min_size, min_value_size); + int cmp = + strncmp(static_cast(min), min_value, min_cmp_size); + if (cmp != 0) { + if (cmp > 0) { + min = min_value; + min_size = min_value_size; + } + } else { + if (min_value_size < min_size) { + min = min_value; + min_size = min_value_size; + } + } + + // Process max. + size_t max_cmp_size = std::min(max_size, max_value_size); + cmp = strncmp(static_cast(max), max_value, max_cmp_size); + if (cmp != 0) { + if (cmp < 0) { + max = max_value; + max_size = max_value_size; + } + } else { + if (max_value_size > max_size) { + max = max_value; + max_size = max_value_size; + } + } + } + } + } + + // Copy values. + if (min != nullptr) { + fragment_mins()[idx].resize(min_size); + memcpy(fragment_mins()[idx].data(), min, min_size); + } + + if (max != nullptr) { + fragment_maxs()[idx].resize(max_size); + memcpy(fragment_maxs()[idx].data(), max, max_size); + } +} + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ +// Explicit template instantiations +template int8_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template uint8_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template int16_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template uint16_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template int32_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template uint32_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template int64_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template char LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template uint64_t LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template float LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template double LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template std::byte LoadedFragmentMetadata::get_tile_min_as( + const std::string& name, uint64_t tile_idx) const; +template int8_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template uint8_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template int16_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template uint16_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template int32_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template uint32_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template int64_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template uint64_t LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template float LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template double LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template std::byte LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; +template char LoadedFragmentMetadata::get_tile_max_as( + const std::string& name, uint64_t tile_idx) const; + } // namespace tiledb::sm diff --git a/tiledb/sm/fragment/loaded_fragment_metadata.h b/tiledb/sm/fragment/loaded_fragment_metadata.h index cd34362d73b..cd9ffffbf75 100644 --- a/tiledb/sm/fragment/loaded_fragment_metadata.h +++ b/tiledb/sm/fragment/loaded_fragment_metadata.h @@ -588,6 +588,94 @@ class LoadedFragmentMetadata { return processed_conditions_set_; } + /** + * Sets the input tile's MBR in the fragment metadata. + * + * @param tile The tile index whose MBR will be set. + * @param mbr The MBR to be set. + */ + void set_mbr(uint64_t tile, const NDRange& mbr); + + /** Returns the MBR of the input tile. */ + const NDRange& LoadedFragmentMetadata::mbr(uint64_t tile_idx) const { + return rtree().leaf(tile_idx); + } + + /** Returns all the MBRs of all tiles in the fragment. */ + const tdb::pmr::vector& LoadedFragmentMetadata::mbrs() const { + return rtree().leaves(); + } + + /** + * Retrieves the tile min value for a given attribute or dimension and tile + * index. + * + * @param name The input attribute/dimension. + * @param tile_idx The index of the tile in the metadata. + * @return Value. + */ + template + T get_tile_min_as(const std::string& name, uint64_t tile_idx) const; + + /** + * Returns the tile metadata for a tile. + * + * @param name Name of the attribute to get the data for. + * @param tile_idx Tile index. + */ + TileMetadata get_tile_metadata( + const std::string& name, const uint64_t tile_idx) const; + + /** + * Retrieves the tile max value for a given attribute or dimension and tile + * index. + * + * @tparam Type to return the data as. + * @param name The input attribute/dimension. + * @param tile_idx The index of the tile in the metadata. + * @return Value. + */ + template + T get_tile_max_as(const std::string& name, uint64_t tile_idx) const; + + /** + * Set the processed conditions. The processed conditions is the list + * of delete/update conditions that have already been applied for this + * fragment and don't need to be applied again. + * + * @param processed_conditions The processed conditions. + */ + void set_processed_conditions(std::vector& processed_conditions); + + /** + * Compute fragment min, max, sum, null count for all dimensions/attributes. + */ + void compute_fragment_min_max_sum_null_count(); + + /** + * Compute the fragment min, max and sum values. + * + * @param name The attribute/dimension name. + */ + template + void compute_fragment_min_max_sum(const std::string& name); + + /** + * Compute the fragment sum value. + * + * @param idx The attribute/dimension index. + * @param nullable Is the attribute/dimension nullable. + */ + template + void compute_fragment_sum(const uint64_t idx, const bool nullable); + + /** + * Compute the fragment min and max values for var sized attributes. + * + * @param name The attribute/dimension name. + */ + void min_max_var(const std::string& name); + protected: /* ********************************* */ /* PRIVATE ATTRIBUTES */ diff --git a/tiledb/sm/query/legacy/reader.cc b/tiledb/sm/query/legacy/reader.cc index 78b21e07898..9adf71096f7 100644 --- a/tiledb/sm/query/legacy/reader.cc +++ b/tiledb/sm/query/legacy/reader.cc @@ -2122,7 +2122,8 @@ Status Reader::add_extra_offset() { bool Reader::sparse_tile_overwritten( unsigned frag_idx, uint64_t tile_idx) const { - const auto& mbr = fragment_metadata_[frag_idx]->mbr(tile_idx); + const auto& mbr = + fragment_metadata_[frag_idx]->loaded_metadata()->mbr(tile_idx); assert(!mbr.empty()); auto fragment_num = (unsigned)fragment_metadata_.size(); auto& domain{array_schema_.domain()}; diff --git a/tiledb/sm/query/readers/attribute_order_validator.h b/tiledb/sm/query/readers/attribute_order_validator.h index 77fb6b1e5aa..eef845da662 100644 --- a/tiledb/sm/query/readers/attribute_order_validator.h +++ b/tiledb/sm/query/readers/attribute_order_validator.h @@ -297,12 +297,14 @@ class AttributeOrderValidator { // Increasing data: The first value on f is the minimum on f. Check // that it is greater than the last (maximum) value in the proceeding // tile on f2. - auto value = fragment_metadata[f]->get_tile_min_as( - attribute_name_, 0); + auto value = fragment_metadata[f] + ->loaded_metadata() + ->get_tile_min_as(attribute_name_, 0); - auto value_previous = - fragment_metadata[f2]->get_tile_max_as( - attribute_name_, f2_tile_idx); + auto value_previous = fragment_metadata[f2] + ->loaded_metadata() + ->get_tile_max_as( + attribute_name_, f2_tile_idx); if (value_previous >= value) { throw AttributeOrderValidatorStatusException( @@ -312,12 +314,14 @@ class AttributeOrderValidator { // Decreasing data: The first value on f is the maximum of f. Check // that is is less than the last (minimum) value in the proceeding // tile on f2. - auto value = fragment_metadata[f]->get_tile_max_as( - attribute_name_, 0); + auto value = fragment_metadata[f] + ->loaded_metadata() + ->get_tile_max_as(attribute_name_, 0); - auto value_previous = - fragment_metadata[f2]->get_tile_min_as( - attribute_name_, f2_tile_idx); + auto value_previous = fragment_metadata[f2] + ->loaded_metadata() + ->get_tile_min_as( + attribute_name_, f2_tile_idx); if (value_previous <= value) { throw AttributeOrderValidatorStatusException( @@ -359,11 +363,14 @@ class AttributeOrderValidator { // Increasing data: The last value on f is the maximum on f. Check // that is less than the first (minimum) value on the following // tile in f2. - auto value = fragment_metadata[f]->get_tile_max_as( - attribute_name_, max_tile_idx); - auto value_next = - fragment_metadata[f2]->get_tile_min_as( - attribute_name_, f2_tile_idx); + auto value = fragment_metadata[f] + ->loaded_metadata() + ->get_tile_max_as( + attribute_name_, max_tile_idx); + auto value_next = fragment_metadata[f2] + ->loaded_metadata() + ->get_tile_min_as( + attribute_name_, f2_tile_idx); if (value_next <= value) { throw AttributeOrderValidatorStatusException( "Attribute out of order"); @@ -373,11 +380,14 @@ class AttributeOrderValidator { // Decreasinging data: The last value on f is the minimum on f. Check // that is greater than the first (maximum) value on the following // tile in f2. - auto value = fragment_metadata[f]->get_tile_min_as( - attribute_name_, max_tile_idx); - auto value_next = - fragment_metadata[f2]->get_tile_max_as( - attribute_name_, f2_tile_idx); + auto value = fragment_metadata[f] + ->loaded_metadata() + ->get_tile_min_as( + attribute_name_, max_tile_idx); + auto value_next = fragment_metadata[f2] + ->loaded_metadata() + ->get_tile_max_as( + attribute_name_, f2_tile_idx); if (value_next >= value) { throw AttributeOrderValidatorStatusException( "Attribute out of order"); @@ -422,8 +432,9 @@ class AttributeOrderValidator { if (!val_data.min_validated_) { // Get the min of the current fragment. - auto value = fragment_metadata[f]->get_tile_min_as( - attribute_name_, 0); + auto value = fragment_metadata[f] + ->loaded_metadata() + ->get_tile_min_as(attribute_name_, 0); // Get the previous value from the loaded tile. auto rt = min_tile_to_compare_against(f); @@ -454,8 +465,10 @@ class AttributeOrderValidator { if (!val_data.max_validated_) { // Get the min of the current fragment. auto max_tile_idx = fragment_metadata[f]->tile_num() - 1; - auto value = fragment_metadata[f]->get_tile_max_as( - attribute_name_, max_tile_idx); + auto value = + fragment_metadata[f] + ->loaded_metadata() + ->get_tile_max_as(attribute_name_, max_tile_idx); // Get the previous value from the loaded tile. auto rt = max_tile_to_compare_against(f); diff --git a/tiledb/sm/query/readers/dense_reader.cc b/tiledb/sm/query/readers/dense_reader.cc index c916e65c717..da12dd3489f 100644 --- a/tiledb/sm/query/readers/dense_reader.cc +++ b/tiledb/sm/query/readers/dense_reader.cc @@ -1472,7 +1472,8 @@ Status DenseReader::process_aggregates( auto& rt = result_space_tile.single_result_tile(); auto tile_idx = rt.tile_idx(); auto& frag_md = fragment_metadata_[rt.frag_idx()]; - auto md = frag_md->get_tile_metadata(name, tile_idx); + auto md = + frag_md->loaded_metadata()->get_tile_metadata(name, tile_idx); auto& aggregates = aggregates_[name]; for (auto& aggregate : aggregates) { aggregate->aggregate_tile_with_frag_md(md); diff --git a/tiledb/sm/query/readers/ordered_dim_label_reader.cc b/tiledb/sm/query/readers/ordered_dim_label_reader.cc index e26a3855d3e..0faa0d70d94 100644 --- a/tiledb/sm/query/readers/ordered_dim_label_reader.cc +++ b/tiledb/sm/query/readers/ordered_dim_label_reader.cc @@ -346,9 +346,11 @@ OrderedDimLabelReader::get_array_tile_indexes_for_range( if (increasing_labels_) { const auto min = - fragment_metadata_[f]->get_tile_min_as(label_name_, 0); - const auto max = fragment_metadata_[f]->get_tile_max_as( - label_name_, tile_num - 1); + fragment_metadata_[f]->loaded_metadata()->get_tile_min_as( + label_name_, 0); + const auto max = + fragment_metadata_[f]->loaded_metadata()->get_tile_max_as( + label_name_, tile_num - 1); if (start_range < min) { start_val_type = IndexValueType::LT; @@ -362,10 +364,12 @@ OrderedDimLabelReader::get_array_tile_indexes_for_range( end_val_type = IndexValueType::GT; } } else { - const auto min = fragment_metadata_[f]->get_tile_min_as( - label_name_, tile_num - 1); + const auto min = + fragment_metadata_[f]->loaded_metadata()->get_tile_min_as( + label_name_, tile_num - 1); const auto max = - fragment_metadata_[f]->get_tile_max_as(label_name_, 0); + fragment_metadata_[f]->loaded_metadata()->get_tile_max_as( + label_name_, 0); if (start_range > max) { start_val_type = IndexValueType::LT; } else if (start_range < min) { @@ -383,16 +387,20 @@ OrderedDimLabelReader::get_array_tile_indexes_for_range( if (start_val_type == IndexValueType::CONTAINED) { if (increasing_labels_) { for (; start_index < tile_num; start_index++) { - const auto max = fragment_metadata_[f]->get_tile_max_as( - label_name_, start_index); + const auto max = + fragment_metadata_[f] + ->loaded_metadata() + ->get_tile_max_as(label_name_, start_index); if (max >= start_range) { break; } } } else { for (;; start_index--) { - const auto max = fragment_metadata_[f]->get_tile_max_as( - label_name_, start_index); + const auto max = + fragment_metadata_[f] + ->loaded_metadata() + ->get_tile_max_as(label_name_, start_index); if (start_index == 0 || max >= start_range) { break; } @@ -404,16 +412,20 @@ OrderedDimLabelReader::get_array_tile_indexes_for_range( if (end_val_type == IndexValueType::CONTAINED) { if (increasing_labels_) { for (;; end_index--) { - const auto min = fragment_metadata_[f]->get_tile_min_as( - label_name_, end_index); + const auto min = + fragment_metadata_[f] + ->loaded_metadata() + ->get_tile_min_as(label_name_, end_index); if (end_index == 0 || min <= end_range) { break; } } } else { for (; end_index < tile_num; end_index++) { - const auto min = fragment_metadata_[f]->get_tile_min_as( - label_name_, end_index); + const auto min = + fragment_metadata_[f] + ->loaded_metadata() + ->get_tile_min_as(label_name_, end_index); if (min <= end_range) { break; } diff --git a/tiledb/sm/query/readers/sparse_global_order_reader.cc b/tiledb/sm/query/readers/sparse_global_order_reader.cc index de157a8bf88..069c30a8c4d 100644 --- a/tiledb/sm/query/readers/sparse_global_order_reader.cc +++ b/tiledb/sm/query/readers/sparse_global_order_reader.cc @@ -2130,8 +2130,9 @@ void SparseGlobalOrderReader::process_aggregates( if (can_aggregate_tile_with_frag_md(result_cell_slabs[i])) { if (range_thread_idx == 0) { auto rt = result_cell_slabs[i].tile_; - auto md = fragment_metadata_[rt->frag_idx()]->get_tile_metadata( - name, rt->tile_idx()); + auto md = fragment_metadata_[rt->frag_idx()] + ->loaded_metadata() + ->get_tile_metadata(name, rt->tile_idx()); for (auto& aggregate : aggregates) { aggregate->aggregate_tile_with_frag_md(md); } diff --git a/tiledb/sm/query/readers/sparse_index_reader_base.cc b/tiledb/sm/query/readers/sparse_index_reader_base.cc index 542b797688d..d8122ad4b70 100644 --- a/tiledb/sm/query/readers/sparse_index_reader_base.cc +++ b/tiledb/sm/query/readers/sparse_index_reader_base.cc @@ -632,7 +632,8 @@ void SparseIndexReaderBase::compute_tile_bitmaps( // Get the MBR for this tile. const auto& mbr = - fragment_metadata_[rt->frag_idx()]->mbr(rt->tile_idx()); + fragment_metadata_[rt->frag_idx()]->loaded_metadata()->mbr( + rt->tile_idx()); // Compute bitmaps one dimension at a time. for (unsigned d = 0; d < dim_num; d++) { diff --git a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc index e077fb6849d..4622a116a4f 100644 --- a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc +++ b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc @@ -1938,8 +1938,9 @@ void SparseUnorderedWithDupsReader::process_aggregates( if (can_aggregate_tile_with_frag_md(rt)) { if (range_thread_idx == 0) { auto t = rt->tile_idx(); - auto md = - fragment_metadata_[rt->frag_idx()]->get_tile_metadata(name, t); + auto md = fragment_metadata_[rt->frag_idx()] + ->loaded_metadata() + ->get_tile_metadata(name, t); for (auto& aggregate : aggregates) { aggregate->aggregate_tile_with_frag_md(md); } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index fccf6754494..2c7f6cffbf4 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -679,12 +679,13 @@ Status GlobalOrderWriter::finalize_global_write_state() { } // Set the processed conditions - meta->set_processed_conditions(processed_conditions_); + meta->loaded_metadata()->set_processed_conditions(processed_conditions_); // Compute fragment min/max/sum/null count and flush fragment metadata to // storage - meta->compute_fragment_min_max_sum_null_count(); - meta->store(array_->get_encryption_key()); + meta->loaded_metadata()->compute_fragment_min_max_sum_null_count(); + meta->store( + frag_meta->loaded_metadata_shared(), array_->get_encryption_key()); // Add written fragment infos for (auto& frag_uri : frag_uris_to_commit_) { @@ -791,7 +792,19 @@ Status GlobalOrderWriter::global_write() { // Set new number of tiles in the fragment metadata auto new_num_tiles = frag_meta->tile_index_base() + num; - frag_meta->set_num_tiles(new_num_tiles); + frag_meta->set_num_tiles( + new_num_tiles, + frag_meta->loaded_metadata()->tile_offsets(), + frag_meta->loaded_metadata()->tile_var_offsets(), + frag_meta->loaded_metadata()->tile_var_sizes(), + frag_meta->loaded_metadata()->tile_validity_offsets(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_sums(), + frag_meta->loaded_metadata()->tile_null_counts()); + if (!frag_meta->dense()) { + frag_meta->loaded_metadata()->rtree().set_leaf_num(new_num_tiles); + } if (new_num_tiles == 0) { throw GlobalOrderWriterException( @@ -829,7 +842,19 @@ Status GlobalOrderWriter::global_write_handle_last_tile() { // Reserve space for the last tile in the fragment metadata auto meta = global_write_state_->frag_meta_; - meta->set_num_tiles(meta->tile_index_base() + 1); + meta->set_num_tiles( + meta->tile_index_base() + 1, + meta->loaded_metadata()->tile_offsets(), + meta->loaded_metadata()->tile_var_offsets(), + meta->loaded_metadata()->tile_var_sizes(), + meta->loaded_metadata()->tile_validity_offsets(), + meta->loaded_metadata()->tile_min_buffer(), + meta->loaded_metadata()->tile_max_buffer(), + meta->loaded_metadata()->tile_sums(), + meta->loaded_metadata()->tile_null_counts()); + if (!meta->dense()) { + meta->loaded_metadata()->rtree().set_leaf_num(meta->tile_index_base() + 1); + } // Filter last tiles RETURN_CANCEL_OR_ERROR(filter_last_tiles(cell_num_last_tiles)); @@ -1430,13 +1455,14 @@ Status GlobalOrderWriter::start_new_fragment() { RETURN_NOT_OK(close_files(frag_meta)); // Set the processed conditions - frag_meta->set_processed_conditions(processed_conditions_); + frag_meta->loaded_metadata()->set_processed_conditions(processed_conditions_); // Compute fragment min/max/sum/null count - frag_meta->compute_fragment_min_max_sum_null_count(); + frag_meta->loaded_metadata()->compute_fragment_min_max_sum_null_count(); // Flush fragment metadata to storage - frag_meta->store(array_->get_encryption_key()); + frag_meta->store( + frag_meta->loaded_metadata_shared(), array_->get_encryption_key()); frag_uris_to_commit_.emplace_back(uri); diff --git a/tiledb/sm/query/writers/ordered_writer.cc b/tiledb/sm/query/writers/ordered_writer.cc index ed20b1f35d4..6930412b5ac 100644 --- a/tiledb/sm/query/writers/ordered_writer.cc +++ b/tiledb/sm/query/writers/ordered_writer.cc @@ -190,7 +190,19 @@ Status OrderedWriter::ordered_write() { auto tile_num = dense_tiler.tile_num(); // Set number of tiles in the fragment metadata - frag_meta->set_num_tiles(tile_num); + frag_meta->set_num_tiles( + tile_num, + frag_meta->loaded_metadata()->tile_offsets(), + frag_meta->loaded_metadata()->tile_var_offsets(), + frag_meta->loaded_metadata()->tile_var_sizes(), + frag_meta->loaded_metadata()->tile_validity_offsets(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_sums(), + frag_meta->loaded_metadata()->tile_null_counts()); + if (!frag_meta->dense()) { + frag_meta->loaded_metadata()->rtree().set_leaf_num(tile_num); + } // Prepare, filter and write tiles for all attributes auto attr_num = buffers_.size(); @@ -232,12 +244,27 @@ Status OrderedWriter::ordered_write() { if (has_min_max_metadata(attr, var_size) && array_schema_.var_size(attr)) { auto& attr_tile_batches = tiles.at(attr); - frag_meta->convert_tile_min_max_var_sizes_to_offsets(attr); + frag_meta->convert_tile_min_max_var_sizes_to_offsets( + attr, + frag_meta->loaded_metadata()->tile_min_var_buffer(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer()); for (auto& batch : attr_tile_batches) { uint64_t idx = 0; for (auto& tile : batch) { - frag_meta->set_tile_min_var(attr, idx, tile.min()); - frag_meta->set_tile_max_var(attr, idx, tile.max()); + frag_meta->set_tile_min_var( + attr, + idx, + tile.min(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_min_var_buffer()); + frag_meta->set_tile_max_var( + attr, + idx, + tile.max(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer()); idx++; } } @@ -251,15 +278,30 @@ Status OrderedWriter::ordered_write() { const auto var_size = array_schema_.var_size(attr); if (has_min_max_metadata(attr, var_size) && array_schema_.var_size(attr)) { - frag_meta->convert_tile_min_max_var_sizes_to_offsets(attr); + frag_meta->convert_tile_min_max_var_sizes_to_offsets( + attr, + frag_meta->loaded_metadata()->tile_min_var_buffer(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer()); RETURN_NOT_OK(parallel_for( compute_tp, 0, attr_tile_batches.size(), [&](uint64_t b) { const auto& attr = buff.first; auto& batch = tiles.at(attr)[b]; auto idx = b * thread_num; for (auto& tile : batch) { - frag_meta->set_tile_min_var(attr, idx, tile.min()); - frag_meta->set_tile_max_var(attr, idx, tile.max()); + frag_meta->set_tile_min_var( + attr, + idx, + tile.min(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_min_var_buffer()); + frag_meta->set_tile_max_var( + attr, + idx, + tile.max(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer()); idx++; } return Status::Ok(); @@ -269,8 +311,8 @@ Status OrderedWriter::ordered_write() { } // Compute fragment min/max/sum/null count and write the fragment metadata - frag_meta->compute_fragment_min_max_sum_null_count(); - frag_meta->store(array_->get_encryption_key()); + frag_meta->loaded_metadata()->compute_fragment_min_max_sum_null_count(); + frag_meta->storefrag_meta->loaded_metadata_shared(), array_->get_encryption_key()); // Add written fragment info RETURN_NOT_OK(add_written_fragment_info(frag_uri_.value())); diff --git a/tiledb/sm/query/writers/unordered_writer.cc b/tiledb/sm/query/writers/unordered_writer.cc index e30273c6eab..84e1c1e1500 100644 --- a/tiledb/sm/query/writers/unordered_writer.cc +++ b/tiledb/sm/query/writers/unordered_writer.cc @@ -687,7 +687,19 @@ Status UnorderedWriter::unordered_write() { auto tile_num = it->second.size(); if (is_coords_pass_) { // Set the number of tiles in the metadata - frag_meta_->set_num_tiles(tile_num); + frag_meta->set_num_tiles( + tile_num, + frag_meta->loaded_metadata()->tile_offsets(), + frag_meta->loaded_metadata()->tile_var_offsets(), + frag_meta->loaded_metadata()->tile_var_sizes(), + frag_meta->loaded_metadata()->tile_validity_offsets(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_sums(), + frag_meta->loaded_metadata()->tile_null_counts()); + if (!frag_meta->dense()) { + frag_meta->loaded_metadata()->rtree().set_leaf_num(tile_num); + } stats_->add_counter("tile_num", tile_num); stats_->add_counter("cell_num", cell_pos_.size()); @@ -715,8 +727,9 @@ Status UnorderedWriter::unordered_write() { if (written_buffers_.size() >= array_schema_.dim_num() + array_schema_.attribute_num()) { // Compute fragment min/max/sum/null count and write the fragment metadata - frag_meta_->compute_fragment_min_max_sum_null_count(); - frag_meta_->store(array_->get_encryption_key()); + frag_meta_->loaded_metadata()->compute_fragment_min_max_sum_null_count(); + frag_meta_->store( + frag_meta->loaded_metadata_shared(), array_->get_encryption_key()); // Add written fragment info RETURN_NOT_OK(add_written_fragment_info(frag_uri_.value())); diff --git a/tiledb/sm/query/writers/writer_base.cc b/tiledb/sm/query/writers/writer_base.cc index b6ff5ffddfd..3a8bc973949 100644 --- a/tiledb/sm/query/writers/writer_base.cc +++ b/tiledb/sm/query/writers/writer_base.cc @@ -677,7 +677,9 @@ void WriterBase::set_coords_metadata( auto status = parallel_for( &resources_.compute_tp(), start_tile_idx, end_tile_idx, [&](uint64_t i) { - meta->set_mbr(i - start_tile_idx, mbrs[i]); + meta->loaded_metadata()->set_mbr( + meta->tile_index_base(), i - start_tile_idx, mbrs[i]); + meta->expand_non_empty_domain(mbrs[i]); return Status::Ok(); }); throw_if_not_ok(status); @@ -791,6 +793,8 @@ Status WriterBase::create_fragment( has_delete_metadata); frag_meta->init(subarray_.ndrange(0)); + frag_meta->loaded_metadata()->resize_offsets(frag_meta->num_dims_and_attrs()); + return Status::Ok(); } @@ -1051,13 +1055,26 @@ Status WriterBase::write_tiles( const auto var_size = array_schema_.var_size(attr); if (has_min_max_metadata(attr, var_size) && array_schema_.var_size(attr)) { - frag_meta->convert_tile_min_max_var_sizes_to_offsets(attr); + frag_meta->convert_tile_min_max_var_sizes_to_offsets( + attr, + frag_meta->loaded_metadata()->tile_min_var_buffer(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer(), + frag_meta->loaded_metadata()->tile_max_buffer()); for (uint64_t idx = start_tile_idx; idx < end_tile_idx; idx++) { frag_meta->set_tile_min_var( - attr, idx - start_tile_idx, tiles[idx].min()); + attr, + idx - start_tile_idx, + tiles[idx].min(), + frag_meta->loaded_metadata()->tile_min_buffer(), + frag_meta->loaded_metadata()->tile_min_var_buffer()); frag_meta->set_tile_max_var( - attr, idx - start_tile_idx, tiles[idx].max()); + attr, + idx - start_tile_idx, + tiles[idx].max(), + frag_meta->loaded_metadata()->tile_max_buffer(), + frag_meta->loaded_metadata()->tile_max_var_buffer()); } } return Status::Ok(); @@ -1112,7 +1129,11 @@ Status WriterBase::write_tiles( t.filtered_buffer().data(), t.filtered_buffer().size(), remote_global_order_write)); - frag_meta->set_tile_offset(name, tile_id, t.filtered_buffer().size()); + frag_meta->set_tile_offset( + name, + tile_id, + t.filtered_buffer().size(), + frag_meta->loaded_metadata()->tile_offsets()); auto null_count = tile.null_count(); if (var_size) { @@ -1123,20 +1144,43 @@ Status WriterBase::write_tiles( t_var.filtered_buffer().size(), remote_global_order_write)); frag_meta->set_tile_var_offset( - name, tile_id, t_var.filtered_buffer().size()); - frag_meta->set_tile_var_size(name, tile_id, tile.var_pre_filtered_size()); + name, + tile_id, + t_var.filtered_buffer().size(), + frag_meta->loaded_metadata()->tile_var_offsets()); + frag_meta->set_tile_var_size( + name, + tile_id, + tile.var_pre_filtered_size(), + frag_meta->loaded_metadata()->tile_var_sizes()); if (has_min_max_md && null_count != frag_meta->cell_num(tile_id)) { - frag_meta->set_tile_min_var_size(name, tile_id, tile.min().size()); - frag_meta->set_tile_max_var_size(name, tile_id, tile.max().size()); + frag_meta->set_tile_min_var_size( + name, + tile_id, + tile.min().size(), + frag_meta->loaded_metadata()->tile_min_buffer()); + frag_meta->set_tile_max_var_size( + name, + tile_id, + tile.max().size(), + frag_meta->loaded_metadata()->tile_max_buffer()); } } else { if (has_min_max_md && null_count != frag_meta->cell_num(tile_id)) { - frag_meta->set_tile_min(name, tile_id, tile.min()); + frag_meta->set_tile_min( + name, + tile_id, + tile.min(), + frag_meta->loaded_metadata()->tile_min_buffer()); frag_meta->set_tile_max(name, tile_id, tile.max()); } if (has_sum_md) { - frag_meta->set_tile_sum(name, tile_id, tile.sum()); + frag_meta->set_tile_sum( + name, + tile_id, + tile.sum(), + frag_meta->loaded_metadata()->tile_sums()); } } @@ -1148,8 +1192,15 @@ Status WriterBase::write_tiles( t_val.filtered_buffer().size(), remote_global_order_write)); frag_meta->set_tile_validity_offset( - name, tile_id, t_val.filtered_buffer().size()); - frag_meta->set_tile_null_count(name, tile_id, null_count); + name, + tile_id, + t_val.filtered_buffer().size(), + frag_meta->loaded_metadata()->tile_validity_offsets()); + frag_meta->set_tile_null_count( + name, + tile_id, + null_count, + frag_meta->loaded_metadata()->tile_null_counts()); } } diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index 9551b0c4c64..c9ed7684e86 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -228,7 +228,7 @@ void InfoCommand::write_svg_mbrs() const { max_y = std::numeric_limits::min(); auto fragment_metadata = array.fragment_metadata(); for (const auto& f : fragment_metadata) { - const auto& mbrs = f->mbrs(); + const auto& mbrs = f->loaded_metadata()->mbrs(); for (const auto& mbr : mbrs) { auto tup = get_mbr(mbr, schema.domain()); min_x = std::min(min_x, std::get<0>(tup)); @@ -296,7 +296,7 @@ void InfoCommand::write_text_mbrs() const { std::stringstream text; for (const auto& f : fragment_metadata) { f->loaded_metadata()->load_rtree(*encryption_key); - const auto& mbrs = f->mbrs(); + const auto& mbrs = f->loaded_metadata()->mbrs(); for (const auto& mbr : mbrs) { auto str_mbr = mbr_to_string(mbr, schema.domain()); for (unsigned i = 0; i < dim_num; i++) {