diff --git a/tiledb/CMakeLists.txt b/tiledb/CMakeLists.txt index 21807023f7a8..e9e1574bca54 100644 --- a/tiledb/CMakeLists.txt +++ b/tiledb/CMakeLists.txt @@ -216,6 +216,7 @@ set(TILEDB_CORE_SOURCES ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_identifier.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_info.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_metadata.cc + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/ondemand_metadata.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/global_state/global_state.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/global_state/signal_handlers.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/global_state/watchdog.cc @@ -1037,4 +1038,4 @@ configure_file( install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tiledb.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -include(${CMAKE_SOURCE_DIR}/cmake/package.cmake) \ No newline at end of file +include(${CMAKE_SOURCE_DIR}/cmake/package.cmake) diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index fa2f2691911c..808b3ba37d90 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -66,13 +66,6 @@ using namespace tiledb::type; namespace tiledb::sm { -class FragmentMetadataStatusException : public StatusException { - public: - explicit FragmentMetadataStatusException(const std::string& message) - : StatusException("FragmentMetadata", message) { - } -}; - /* ****************************** */ /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ @@ -82,7 +75,6 @@ FragmentMetadata::FragmentMetadata( : resources_(resources) , memory_tracker_(memory_tracker) , rtree_(RTree(nullptr, constants::rtree_fanout, memory_tracker_)) - , tile_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_var_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_var_sizes_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_validity_offsets_( @@ -95,7 +87,8 @@ FragmentMetadata::FragmentMetadata( memory_tracker_->get_resource(MemoryType::TILE_MAX_VALS)) , tile_sums_(memory_tracker_->get_resource(MemoryType::TILE_SUMS)) , tile_null_counts_( - memory_tracker_->get_resource(MemoryType::TILE_NULL_COUNTS)) { + memory_tracker_->get_resource(MemoryType::TILE_NULL_COUNTS)) + , ondemand_metadata_(*this, memory_tracker) { } FragmentMetadata::FragmentMetadata( @@ -123,7 +116,6 @@ FragmentMetadata::FragmentMetadata( , rtree_(RTree( &array_schema_->domain(), constants::rtree_fanout, memory_tracker_)) , tile_index_base_(0) - , tile_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_var_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_var_sizes_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) , tile_validity_offsets_( @@ -139,7 +131,8 @@ FragmentMetadata::FragmentMetadata( memory_tracker_->get_resource(MemoryType::TILE_NULL_COUNTS)) , version_(array_schema_->write_version()) , timestamp_range_(timestamp_range) - , array_uri_(array_schema_->array_uri()) { + , array_uri_(array_schema_->array_uri()) + , ondemand_metadata_(*this, memory_tracker) { build_idx_map(); array_schema_name_ = array_schema_->name(); } @@ -167,8 +160,8 @@ void FragmentMetadata::set_tile_offset( assert(it != idx_map_.end()); auto idx = it->second; tid += tile_index_base_; - assert(tid < tile_offsets_[idx].size()); - tile_offsets_[idx][tid] = file_sizes_[idx]; + assert(tid < ondemand_metadata_.tile_offsets()[idx].size()); + ondemand_metadata_.tile_offsets()[idx][tid] = file_sizes_[idx]; file_sizes_[idx] += step; } @@ -726,8 +719,7 @@ void FragmentMetadata::init(const NDRange& non_empty_domain) { last_tile_cell_num_ = 0; // Initialize tile offsets - tile_offsets_.resize(num); - tile_offsets_mtx_.resize(num); + ondemand_metadata_.resize_tile_offsets_vectors(num); file_sizes_.resize(num); for (unsigned int i = 0; i < num; ++i) file_sizes_[i] = 0; @@ -1210,7 +1202,7 @@ void FragmentMetadata::store_v15_or_higher( void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { for (auto& it : idx_map_) { auto i = it.second; - assert(num_tiles >= tile_offsets_[i].size()); + assert(num_tiles >= ondemand_metadata_.tile_offsets()[i].size()); // Get the fixed cell size const auto is_dim = array_schema_->is_dim(it.first); @@ -1218,7 +1210,7 @@ void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { const auto cell_size = var_size ? constants::cell_var_offset_size : array_schema_->cell_size(it.first); - tile_offsets_[i].resize(num_tiles, 0); + ondemand_metadata_.tile_offsets()[i].resize(num_tiles, 0); tile_var_offsets_[i].resize(num_tiles, 0); tile_var_sizes_[i].resize(num_tiles, 0); tile_validity_offsets_[i].resize(num_tiles, 0); @@ -1373,42 +1365,6 @@ const std::string& FragmentMetadata::array_schema_name() { return array_schema_name_; } -void FragmentMetadata::load_tile_offsets( - const EncryptionKey& encryption_key, std::vector& names) { - // Sort 'names' in ascending order of their index. The - // motivation is to load the offsets in order of their - // layout for sequential reads to the file. - std::sort( - names.begin(), - names.end(), - [&](const std::string& lhs, const std::string& rhs) { - assert(idx_map_.count(lhs) > 0); - assert(idx_map_.count(rhs) > 0); - return idx_map_[lhs] < idx_map_[rhs]; - }); - - // The fixed offsets are located before the - // var offsets. Load all of the fixed offsets - // first. - for (const auto& name : names) { - load_tile_offsets(encryption_key, idx_map_[name]); - } - - // Load all of the var offsets. - for (const auto& name : names) { - if (array_schema_->var_size(name)) { - load_tile_var_offsets(encryption_key, idx_map_[name]); - } - } - - // Load all of the var offsets. - for (const auto& name : names) { - if (array_schema_->is_nullable(name)) { - load_tile_validity_offsets(encryption_key, idx_map_[name]); - } - } -} - void FragmentMetadata::load_tile_min_values( const EncryptionKey& encryption_key, std::vector& names) { // Sort 'names' in ascending order of their index. The @@ -1535,19 +1491,6 @@ void FragmentMetadata::load_processed_conditions( loaded_metadata_.processed_conditions_ = true; } -uint64_t FragmentMetadata::file_offset( - const std::string& name, uint64_t tile_idx) const { - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_.tile_offsets_[idx]) { - throw std::logic_error( - "Trying to access tile offsets metadata that's not loaded"); - } - - return tile_offsets_[idx][tile_idx]; -} - uint64_t FragmentMetadata::file_var_offset( const std::string& name, uint64_t tile_idx) const { auto it = idx_map_.find(name); @@ -1582,25 +1525,6 @@ const tdb::pmr::vector& FragmentMetadata::mbrs() const { return rtree_.leaves(); } -uint64_t FragmentMetadata::persisted_tile_size( - const std::string& name, uint64_t tile_idx) const { - auto it = idx_map_.find(name); - assert(it != idx_map_.end()); - auto idx = it->second; - if (!loaded_metadata_.tile_offsets_[idx]) { - throw std::logic_error( - "Trying to access persisted tile offsets metadata that's not present"); - } - - auto tile_num = this->tile_num(); - - auto tile_size = - (tile_idx != tile_num - 1) ? - tile_offsets_[idx][tile_idx + 1] - tile_offsets_[idx][tile_idx] : - file_sizes_[idx] - tile_offsets_[idx][tile_idx]; - return tile_size; -} - uint64_t FragmentMetadata::persisted_tile_var_size( const std::string& name, uint64_t tile_idx) const { auto it = idx_map_.find(name); @@ -2106,61 +2030,6 @@ void FragmentMetadata::free_rtree() { loaded_metadata_.rtree_ = false; } -void FragmentMetadata::free_tile_offsets() { - for (uint64_t i = 0; i < tile_offsets_.size(); i++) { - std::lock_guard lock(tile_offsets_mtx_[i]); - if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory( - tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); - } - tile_offsets_[i].clear(); - loaded_metadata_.tile_offsets_[i] = false; - } - - for (uint64_t i = 0; i < tile_var_offsets_.size(); i++) { - std::lock_guard lock(tile_var_offsets_mtx_[i]); - if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory( - tile_var_offsets_[i].size() * sizeof(uint64_t), - MemoryType::TILE_OFFSETS); - } - tile_var_offsets_[i].clear(); - loaded_metadata_.tile_var_offsets_[i] = false; - } - - for (uint64_t i = 0; i < tile_offsets_.size(); i++) { - std::lock_guard lock(tile_offsets_mtx_[i]); - if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory( - tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); - } - tile_offsets_[i].clear(); - loaded_metadata_.tile_offsets_[i] = false; - } - - for (uint64_t i = 0; i < tile_validity_offsets_.size(); i++) { - std::lock_guard lock(mtx_); - if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory( - tile_validity_offsets_[i].size() * sizeof(uint64_t), - MemoryType::TILE_OFFSETS); - } - tile_validity_offsets_[i].clear(); - loaded_metadata_.tile_validity_offsets_[i] = false; - } - - for (uint64_t i = 0; i < tile_var_sizes_.size(); i++) { - std::lock_guard lock(mtx_); - if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory( - tile_var_sizes_[i].size() * sizeof(uint64_t), - MemoryType::TILE_OFFSETS); - } - tile_var_sizes_[i].clear(); - loaded_metadata_.tile_var_sizes_[i] = false; - } -} - void FragmentMetadata::load_tile_var_sizes( const EncryptionKey& encryption_key, const std::string& name) { if (version_ <= 2) { @@ -2455,33 +2324,6 @@ void FragmentMetadata::expand_non_empty_domain(const NDRange& mbr) { array_schema_->domain().expand_ndrange(mbr, &non_empty_domain_); } -void FragmentMetadata::load_tile_offsets( - const EncryptionKey& encryption_key, unsigned idx) { - if (version_ <= 2) { - return; - } - - // If the tile offset is already loaded, exit early to avoid the lock - if (loaded_metadata_.tile_offsets_[idx]) { - return; - } - - std::lock_guard lock(tile_offsets_mtx_[idx]); - - if (loaded_metadata_.tile_offsets_[idx]) { - return; - } - - auto tile = read_generic_tile_from_file( - encryption_key, gt_offsets_.tile_offsets_[idx]); - resources_->stats().add_counter("read_tile_offsets_size", tile->size()); - - Deserializer deserializer(tile->data(), tile->size()); - load_tile_offsets(idx, deserializer); - - loaded_metadata_.tile_offsets_[idx] = true; -} - void FragmentMetadata::load_tile_var_offsets( const EncryptionKey& encryption_key, unsigned idx) { if (version_ <= 2) { @@ -2901,68 +2743,6 @@ void FragmentMetadata::load_non_empty_domain_v5_or_higher( } } -// Applicable only to versions 1 and 2 -void FragmentMetadata::load_tile_offsets(Deserializer& deserializer) { - uint64_t tile_offsets_num = 0; - unsigned int attribute_num = array_schema_->attribute_num(); - - // Allocate tile offsets - tile_offsets_.resize(attribute_num + 1); - tile_offsets_mtx_.resize(attribute_num + 1); - - // For all attributes, get the tile offsets - for (unsigned int i = 0; i < attribute_num + 1; ++i) { - // Get number of tile offsets - tile_offsets_num = deserializer.read(); - - if (tile_offsets_num == 0) - continue; - - auto size = tile_offsets_num * sizeof(uint64_t); - if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { - throw FragmentMetadataStatusException( - "Cannot load tile offsets; Insufficient memory budget; Needed " + - std::to_string(size) + " but only had " + - std::to_string(memory_tracker_->get_memory_available()) + - " from budget " + - std::to_string(memory_tracker_->get_memory_budget())); - } - - // Get tile offsets - tile_offsets_[i].resize(tile_offsets_num); - deserializer.read(&tile_offsets_[i][0], size); - } - - loaded_metadata_.tile_offsets_.resize( - array_schema_->attribute_num() + 1, true); -} - -void FragmentMetadata::load_tile_offsets( - unsigned idx, Deserializer& deserializer) { - uint64_t tile_offsets_num = 0; - - // Get number of tile offsets - tile_offsets_num = deserializer.read(); - - // Get tile offsets - if (tile_offsets_num != 0) { - auto size = tile_offsets_num * sizeof(uint64_t); - if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { - throw FragmentMetadataStatusException( - "Cannot load tile offsets; Insufficient memory budget; Needed " + - std::to_string(size) + " but only had " + - std::to_string(memory_tracker_->get_memory_available()) + - " from budget " + - std::to_string(memory_tracker_->get_memory_budget())); - } - - tile_offsets_[idx].resize(tile_offsets_num); - deserializer.read(&tile_offsets_[idx][0], size); - } -} - // ===== FORMAT ===== // tile_var_offsets_attr#0_num (uint64_t) // tile_var_offsets_attr#0_#1 (uint64_t) tile_var_offsets_attr#0_#2 @@ -3628,7 +3408,7 @@ void FragmentMetadata::load_v1_v2( load_non_empty_domain(deserializer); load_mbrs(deserializer); load_bounding_coords(deserializer); - load_tile_offsets(deserializer); + ondemand_metadata().load_tile_offsets(deserializer); load_tile_var_offsets(deserializer); load_tile_var_sizes(deserializer); load_last_tile_cell_num(deserializer); @@ -3721,8 +3501,7 @@ void FragmentMetadata::load_footer( has_delete_meta_ * 2; num += (version_ >= 5) ? array_schema_->dim_num() : 0; - tile_offsets_.resize(num); - tile_offsets_mtx_.resize(num); + ondemand_metadata_.resize_tile_offsets_vectors(num); tile_var_offsets_.resize(num); tile_var_offsets_mtx_.resize(num); tile_var_sizes_.resize(num); @@ -4025,13 +3804,14 @@ void FragmentMetadata::store_tile_offsets( void FragmentMetadata::write_tile_offsets( unsigned idx, Serializer& serializer) { // Write number of tile offsets - uint64_t tile_offsets_num = tile_offsets_[idx].size(); + uint64_t tile_offsets_num = ondemand_metadata_.tile_offsets()[idx].size(); serializer.write(tile_offsets_num); // Write tile offsets if (tile_offsets_num != 0) { serializer.write( - &tile_offsets_[idx][0], tile_offsets_num * sizeof(uint64_t)); + &ondemand_metadata_.tile_offsets()[idx][0], + tile_offsets_num * sizeof(uint64_t)); } } @@ -4642,11 +4422,6 @@ void FragmentMetadata::store_footer(const EncryptionKey&) { resources_->stats().add_counter("write_frag_meta_footer_size", tile->size()); } -void FragmentMetadata::resize_tile_offsets_vectors(uint64_t size) { - tile_offsets_mtx().resize(size); - tile_offsets().resize(size); -} - void FragmentMetadata::resize_tile_var_offsets_vectors(uint64_t size) { tile_var_offsets_mtx().resize(size); tile_var_offsets().resize(size); diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index f23dd1950623..c0917cd9fdfd 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -39,6 +39,7 @@ #include #include +#include "ondemand_metadata.h" #include "tiledb/common/common.h" #include "tiledb/common/pmr.h" #include "tiledb/sm/array_schema/array_schema.h" @@ -63,6 +64,13 @@ class EncryptionKey; class TileMetadata; class MemoryTracker; +class FragmentMetadataStatusException : public StatusException { + public: + explicit FragmentMetadataStatusException(const std::string& message) + : StatusException("FragmentMetadata", message) { + } +}; + /** Stores the metadata structures of a fragment. */ class FragmentMetadata { public: @@ -131,7 +139,6 @@ class FragmentMetadata { bool fragment_min_max_sum_null_count_ = false; bool processed_conditions_ = false; }; - /** * Stores the start offsets of the generic tiles stored in the * metadata file, each separately storing the various metadata @@ -326,12 +333,6 @@ class FragmentMetadata { return tile_index_base_; } - /** Returns the tile offsets. */ - inline const tdb::pmr::vector>& tile_offsets() - const { - return tile_offsets_; - } - /** Returns the variable tile offsets. */ inline const tdb::pmr::vector>& tile_var_offsets() const { @@ -746,17 +747,6 @@ class FragmentMetadata { /** Return the array schema name. */ const std::string& array_schema_name(); - /** - * Retrieves the starting offset of the input tile of the input attribute - * or dimension in the file. If the attribute/dimension is var-sized, it - * returns the starting offset of the offsets tile. - * - * @param name The input attribute/dimension. - * @param tile_idx The index of the tile in the metadata. - * @return The file offset to be retrieved. - */ - uint64_t file_offset(const std::string& name, uint64_t tile_idx) const; - /** * Retrieves the starting offset of the input tile of input attribute or * dimension in the file. The attribute/dimension must be var-sized. @@ -786,19 +776,6 @@ class FragmentMetadata { /** Returns all the MBRs of all tiles in the fragment. */ const tdb::pmr::vector& mbrs() const; - /** - * Retrieves the size of the tile when it is persisted (e.g. the size of the - * compressed tile on disk) for a given attribute or dimension and tile index. - * If the attribute/dimension is var-sized, this will return the persisted - * size of the offsets tile. - * - * @param name The input attribute/dimension. - * @param tile_idx The index of the tile in the metadata. - * @return Size. - */ - uint64_t persisted_tile_size( - const std::string& name, uint64_t tile_idx) const; - /** * Retrieves the size of the tile when it is persisted (e.g. the size of the * compressed tile on disk) for a given var-sized attribute or dimension @@ -973,9 +950,6 @@ class FragmentMetadata { /** Frees the memory associated with the rtree. */ void free_rtree(); - /** Frees the memory associated with tile_offsets. */ - void free_tile_offsets(); - /** * Loads the variable tile sizes for the input attribute or dimension idx * from storage. @@ -983,15 +957,6 @@ class FragmentMetadata { void load_tile_var_sizes( const EncryptionKey& encryption_key, const std::string& name); - /** - * Loads tile offsets for the attribute/dimension names. - * - * @param encryption_key The key the array got opened with. - * @param names The attribute/dimension names. - */ - void load_tile_offsets( - const EncryptionKey& encryption_key, std::vector& names); - /** * Loads min values for the attribute names. * @@ -1120,16 +1085,6 @@ class FragmentMetadata { return tile_index_base_; } - /** tile_offsets accessor */ - tdb::pmr::vector>& tile_offsets() { - return tile_offsets_; - } - - /** tile_offsets_mtx accessor */ - std::deque& tile_offsets_mtx() { - return tile_offsets_mtx_; - } - /** tile_var_offsets accessor */ tdb::pmr::vector>& tile_var_offsets() { return tile_var_offsets_; @@ -1245,11 +1200,6 @@ class FragmentMetadata { loaded_metadata_ = loaded_metadata; } - /** - * Resize tile offsets related vectors. - */ - void resize_tile_offsets_vectors(uint64_t size); - /** * Resize tile var offsets related vectors. */ @@ -1265,6 +1215,14 @@ class FragmentMetadata { */ void resize_tile_validity_offsets_vectors(uint64_t size); + inline OndemandMetadata& ondemand_metadata() { + return ondemand_metadata_; + } + + inline const OndemandMetadata& ondemand_metadata() const { + return ondemand_metadata_; + } + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ @@ -1347,9 +1305,6 @@ class FragmentMetadata { /** Local mutex for thread-safety. */ std::mutex mtx_; - /** Mutex per tile offset loading. */ - std::deque tile_offsets_mtx_; - /** Mutex per tile var offset loading. */ std::deque tile_var_offsets_mtx_; @@ -1365,12 +1320,6 @@ class FragmentMetadata { */ uint64_t tile_index_base_; - /** - * The tile offsets in their corresponding attribute files. Meaningful only - * when there is compression. - */ - tdb::pmr::vector> tile_offsets_; - /** * The variable tile offsets in their corresponding attribute files. * Meaningful only for variable-sized tiles. @@ -1462,6 +1411,8 @@ class FragmentMetadata { */ std::vector processed_conditions_; + OndemandMetadata ondemand_metadata_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ @@ -1520,12 +1471,6 @@ class FragmentMetadata { */ void expand_non_empty_domain(const NDRange& mbr); - /** - * Loads the tile offsets for the input attribute or dimension idx - * from storage. - */ - void load_tile_offsets(const EncryptionKey& encryption_key, unsigned idx); - /** * Loads the variable tile offsets for the input attribute or dimension idx * from storage. @@ -1701,18 +1646,6 @@ class FragmentMetadata { */ void load_non_empty_domain_v5_or_higher(Deserializer& deserializer); - /** - * Loads the tile offsets for the input attribute from the input buffer. - * Applicable to versions 1 and 2 - */ - void load_tile_offsets(Deserializer& deserializer); - - /** - * Loads the tile offsets for the input attribute or dimension from the - * input buffer. - */ - void load_tile_offsets(unsigned idx, Deserializer& deserializer); - /** * Loads the variable tile offsets from the input buffer. * Applicable to versions 1 and 2 @@ -2090,6 +2023,8 @@ class FragmentMetadata { * This builds the index mapping for attribute/dimension name to id. */ void build_idx_map(); + + friend class OndemandMetadata; }; } // namespace sm diff --git a/tiledb/sm/fragment/ondemand_metadata.cc b/tiledb/sm/fragment/ondemand_metadata.cc new file mode 100644 index 000000000000..f448c8306cc0 --- /dev/null +++ b/tiledb/sm/fragment/ondemand_metadata.cc @@ -0,0 +1,308 @@ +/** + * @file ondemand_metadata.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements the OndemandMetadata class. + */ + +#include "tiledb/common/common.h" + +#include +#include +#include +#include +#include "tiledb/common/heap_memory.h" +#include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" +#include "tiledb/sm/array_schema/array_schema.h" +#include "tiledb/sm/array_schema/attribute.h" +#include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/array_schema/domain.h" +#include "tiledb/sm/buffer/buffer.h" +#include "tiledb/sm/filesystem/vfs.h" +#include "tiledb/sm/fragment/fragment_identifier.h" +#include "tiledb/sm/fragment/fragment_metadata.h" +#include "tiledb/sm/fragment/ondemand_metadata.h" +#include "tiledb/sm/misc/constants.h" +#include "tiledb/sm/misc/parallel_functions.h" +#include "tiledb/sm/misc/utils.h" +#include "tiledb/sm/query/readers/aggregators/tile_metadata.h" +#include "tiledb/sm/stats/global_stats.h" +#include "tiledb/sm/storage_manager/storage_manager.h" +#include "tiledb/sm/tile/generic_tile_io.h" +#include "tiledb/sm/tile/tile.h" +#include "tiledb/sm/tile/tile_metadata_generator.h" +#include "tiledb/storage_format/serialization/serializers.h" +#include "tiledb/type/range/range.h" + +using namespace tiledb::common; +using namespace tiledb::type; + +namespace tiledb::sm { + +/* ********************************* */ +/* CONSTRUCTORS & DESTRUCTORS */ +/* ********************************* */ + +OndemandMetadata::OndemandMetadata( + FragmentMetadata& parent, shared_ptr memory_tracker) + : parent_fragment_(parent) + , memory_tracker_(memory_tracker) + , tile_offsets_(memory_tracker->get_resource(MemoryType::TILE_OFFSETS)) { +} + +/* ********************************* */ +/* API */ +/* ********************************* */ + +uint64_t OndemandMetadata::persisted_tile_size( + const std::string& name, uint64_t tile_idx) const { + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (!parent_fragment_.loaded_metadata_.tile_offsets_[idx]) { + throw std::logic_error( + "Trying to access persisted tile offsets metadata that's not present"); + } + + auto tile_num = parent_fragment_.tile_num(); + + auto tile_size = + (tile_idx != tile_num - 1) ? + tile_offsets_[idx][tile_idx + 1] - tile_offsets_[idx][tile_idx] : + parent_fragment_.file_sizes_[idx] - tile_offsets_[idx][tile_idx]; + return tile_size; +} + +void OndemandMetadata::load_tile_offsets( + const EncryptionKey& encryption_key, unsigned idx) { + if (parent_fragment_.version_ <= 2) { + return; + } + + // If the tile offset is already loaded, exit early to avoid the lock + if (parent_fragment_.loaded_metadata_.tile_offsets_[idx]) { + return; + } + + std::lock_guard lock(tile_offsets_mtx_[idx]); + + if (parent_fragment_.loaded_metadata_.tile_offsets_[idx]) { + return; + } + + auto tile = parent_fragment_.read_generic_tile_from_file( + encryption_key, parent_fragment_.gt_offsets_.tile_offsets_[idx]); + parent_fragment_.resources_->stats().add_counter( + "read_tile_offsets_size", tile->size()); + + Deserializer deserializer(tile->data(), tile->size()); + load_tile_offsets(idx, deserializer); + + parent_fragment_.loaded_metadata_.tile_offsets_[idx] = true; +} + +void OndemandMetadata::load_tile_offsets( + unsigned idx, Deserializer& deserializer) { + uint64_t tile_offsets_num = 0; + + // Get number of tile offsets + tile_offsets_num = deserializer.read(); + + // Get tile offsets + if (tile_offsets_num != 0) { + auto size = tile_offsets_num * sizeof(uint64_t); + if (memory_tracker_ != nullptr && + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { + throw FragmentMetadataStatusException( + "Cannot load tile offsets; Insufficient memory budget; Needed " + + std::to_string(size) + " but only had " + + std::to_string(memory_tracker_->get_memory_available()) + + " from budget " + + std::to_string(memory_tracker_->get_memory_budget())); + } + + tile_offsets_[idx].resize(tile_offsets_num); + deserializer.read(&tile_offsets_[idx][0], size); + } +} + +void OndemandMetadata::load_tile_offsets( + const EncryptionKey& encryption_key, std::vector& names) { + // Sort 'names' in ascending order of their index. The + // motivation is to load the offsets in order of their + // layout for sequential reads to the file. + std::sort( + names.begin(), + names.end(), + [&](const std::string& lhs, const std::string& rhs) { + assert(parent_fragment_.idx_map_.count(lhs) > 0); + assert(parent_fragment_.idx_map_.count(rhs) > 0); + return parent_fragment_.idx_map_[lhs] < parent_fragment_.idx_map_[rhs]; + }); + + // The fixed offsets are located before the + // var offsets. Load all of the fixed offsets + // first. + for (const auto& name : names) { + load_tile_offsets(encryption_key, parent_fragment_.idx_map_[name]); + } + + // Load all of the var offsets. + for (const auto& name : names) { + if (parent_fragment_.array_schema_->var_size(name)) { + parent_fragment_.load_tile_var_offsets( + encryption_key, parent_fragment_.idx_map_[name]); + } + } + + // Load all of the var offsets. + for (const auto& name : names) { + if (parent_fragment_.array_schema_->is_nullable(name)) { + parent_fragment_.load_tile_validity_offsets( + encryption_key, parent_fragment_.idx_map_[name]); + } + } +} + +// Applicable only to versions 1 and 2 +void OndemandMetadata::load_tile_offsets(Deserializer& deserializer) { + uint64_t tile_offsets_num = 0; + unsigned int attribute_num = parent_fragment_.array_schema_->attribute_num(); + + // Allocate tile offsets + tile_offsets_.resize(attribute_num + 1); + tile_offsets_mtx_.resize(attribute_num + 1); + + // For all attributes, get the tile offsets + for (unsigned int i = 0; i < attribute_num + 1; ++i) { + // Get number of tile offsets + tile_offsets_num = deserializer.read(); + + if (tile_offsets_num == 0) + continue; + + auto size = tile_offsets_num * sizeof(uint64_t); + if (memory_tracker_ != nullptr && + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { + throw FragmentMetadataStatusException( + "Cannot load tile offsets; Insufficient memory budget; Needed " + + std::to_string(size) + " but only had " + + std::to_string(memory_tracker_->get_memory_available()) + + " from budget " + + std::to_string(memory_tracker_->get_memory_budget())); + } + + // Get tile offsets + tile_offsets_[i].resize(tile_offsets_num); + deserializer.read(&tile_offsets_[i][0], size); + } + + parent_fragment_.loaded_metadata_.tile_offsets_.resize( + parent_fragment_.array_schema_->attribute_num() + 1, true); +} + +void OndemandMetadata::free_tile_offsets() { + for (uint64_t i = 0; i < tile_offsets_.size(); i++) { + std::lock_guard lock(tile_offsets_mtx_[i]); + if (memory_tracker_ != nullptr) { + memory_tracker_->release_memory( + tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); + } + tile_offsets_[i].clear(); + parent_fragment_.loaded_metadata_.tile_offsets_[i] = false; + } + + for (uint64_t i = 0; i < parent_fragment_.tile_var_offsets_.size(); i++) { + std::lock_guard lock(parent_fragment_.tile_var_offsets_mtx_[i]); + if (memory_tracker_ != nullptr) { + memory_tracker_->release_memory( + parent_fragment_.tile_var_offsets_[i].size() * sizeof(uint64_t), + MemoryType::TILE_OFFSETS); + } + parent_fragment_.tile_var_offsets_[i].clear(); + parent_fragment_.loaded_metadata_.tile_var_offsets_[i] = false; + } + + for (uint64_t i = 0; i < tile_offsets_.size(); i++) { + std::lock_guard lock(tile_offsets_mtx_[i]); + if (memory_tracker_ != nullptr) { + memory_tracker_->release_memory( + tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); + } + tile_offsets_[i].clear(); + parent_fragment_.loaded_metadata_.tile_offsets_[i] = false; + } + + for (uint64_t i = 0; i < parent_fragment_.tile_validity_offsets_.size(); + i++) { + std::lock_guard lock(parent_fragment_.mtx_); + if (memory_tracker_ != nullptr) { + memory_tracker_->release_memory( + parent_fragment_.tile_validity_offsets_[i].size() * sizeof(uint64_t), + MemoryType::TILE_OFFSETS); + } + parent_fragment_.tile_validity_offsets_[i].clear(); + parent_fragment_.loaded_metadata_.tile_validity_offsets_[i] = false; + } + + for (uint64_t i = 0; i < parent_fragment_.tile_var_sizes_.size(); i++) { + std::lock_guard lock(parent_fragment_.mtx_); + if (memory_tracker_ != nullptr) { + memory_tracker_->release_memory( + parent_fragment_.tile_var_sizes_[i].size() * sizeof(uint64_t), + MemoryType::TILE_OFFSETS); + } + parent_fragment_.tile_var_sizes_[i].clear(); + parent_fragment_.loaded_metadata_.tile_var_sizes_[i] = false; + } +} + +uint64_t OndemandMetadata::file_offset( + const std::string& name, uint64_t tile_idx) const { + auto it = parent_fragment_.idx_map_.find(name); + assert(it != parent_fragment_.idx_map_.end()); + auto idx = it->second; + if (!parent_fragment_.loaded_metadata_.tile_offsets_[idx]) { + throw std::logic_error( + "Trying to access tile offsets metadata that's not loaded"); + } + + return tile_offsets_[idx][tile_idx]; +} + +void OndemandMetadata::resize_tile_offsets_vectors(uint64_t size) { + tile_offsets_mtx_.resize(size); + tile_offsets_.resize(size); +} + +/* ********************************* */ +/* PRIVATE METHODS */ +/* ********************************* */ + +} // namespace tiledb::sm diff --git a/tiledb/sm/fragment/ondemand_metadata.h b/tiledb/sm/fragment/ondemand_metadata.h new file mode 100644 index 000000000000..f96486af167f --- /dev/null +++ b/tiledb/sm/fragment/ondemand_metadata.h @@ -0,0 +1,174 @@ +/** + * @file ondemand_metadata.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines class OndemandMetadata. + */ + +#ifndef TILEDB_ONDEMAND_METADATA_H +#define TILEDB_ONDEMAND_METADATA_H + +#include +#include +#include +#include + +#include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" +#include "tiledb/sm/array_schema/array_schema.h" +#include "tiledb/sm/filesystem/uri.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/sm/rtree/rtree.h" +#include "tiledb/sm/storage_manager/context_resources.h" + +namespace tiledb { +namespace sm { + +class FragmentMetadata; + +/** Collection of lazily loaded fragment metadata */ +class OndemandMetadata { + public: + /* ********************************* */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ********************************* */ + + /** + * Constructor. + * + * @param resources A context resources instance. + * @param memory_tracker The memory tracker of the array this fragment + * metadata corresponds to. + */ + OndemandMetadata( + FragmentMetadata& parent, shared_ptr memory_tracker); + + /* ********************************* */ + /* API */ + /* ********************************* */ + + /** Returns the tile offsets. */ + inline const tdb::pmr::vector>& tile_offsets() + const { + return tile_offsets_; + } + + /** tile_offsets accessor */ + tdb::pmr::vector>& tile_offsets() { + return tile_offsets_; + } + + /** + * Retrieves the size of the tile when it is persisted (e.g. the size of the + * compressed tile on disk) for a given attribute or dimension and tile index. + * If the attribute/dimension is var-sized, this will return the persisted + * size of the offsets tile. + * + * @param name The input attribute/dimension. + * @param tile_idx The index of the tile in the metadata. + * @return Size. + */ + uint64_t persisted_tile_size( + const std::string& name, uint64_t tile_idx) const; + + /** + * Loads the tile offsets for the input attribute or dimension idx + * from storage. + */ + void load_tile_offsets(const EncryptionKey& encryption_key, unsigned idx); + + /** + * Loads the tile offsets for the input attribute or dimension from the + * input buffer. + */ + void load_tile_offsets(unsigned idx, Deserializer& deserializer); + + /** + * Loads tile offsets for the attribute/dimension names. + * + * @param encryption_key The key the array got opened with. + * @param names The attribute/dimension names. + */ + void load_tile_offsets( + const EncryptionKey& encryption_key, std::vector& names); + + /** + * Loads the tile offsets for the input attribute from the input buffer. + * Applicable to versions 1 and 2 + */ + void load_tile_offsets(Deserializer& deserializer); + + /** Frees the memory associated with tile_offsets. */ + void free_tile_offsets(); + + /** + * Retrieves the starting offset of the input tile of the input attribute + * or dimension in the file. If the attribute/dimension is var-sized, it + * returns the starting offset of the offsets tile. + * + * @param name The input attribute/dimension. + * @param tile_idx The index of the tile in the metadata. + * @return The file offset to be retrieved. + */ + uint64_t file_offset(const std::string& name, uint64_t tile_idx) const; + + /** + * Resize tile offsets related vectors. + */ + void resize_tile_offsets_vectors(uint64_t size); + + private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + + FragmentMetadata& parent_fragment_; + + /** + * The memory tracker of the array this fragment metadata corresponds to. + */ + shared_ptr memory_tracker_; + + /** + * The tile offsets in their corresponding attribute files. Meaningful only + * when there is compression. + */ + tdb::pmr::vector> tile_offsets_; + + /** Mutex per tile offset loading. */ + std::deque tile_offsets_mtx_; + + /* ********************************* */ + /* PRIVATE METHODS */ + /* ********************************* */ +}; + +} // namespace sm +} // namespace tiledb + +#endif // TILEDB_ONDEMAND_METADATA_H diff --git a/tiledb/sm/query/readers/filtered_data.h b/tiledb/sm/query/readers/filtered_data.h index 4b66a38dd3a2..377dbe2ce164 100644 --- a/tiledb/sm/query/readers/filtered_data.h +++ b/tiledb/sm/query/readers/filtered_data.h @@ -327,7 +327,8 @@ class FilteredData { */ inline void* fixed_filtered_data( const FragmentMetadata* fragment, const ResultTile* rt) { - auto offset{fragment->file_offset(name_, rt->tile_idx())}; + auto offset{ + fragment->ondemand_metadata().file_offset(name_, rt->tile_idx())}; ensure_data_block_current(TileType::FIXED, fragment, rt, offset); return current_data_block(TileType::FIXED)->data_at(offset); } @@ -443,7 +444,7 @@ class FilteredData { const uint64_t tile_idx) { switch (type) { case TileType::FIXED: - return fragment->file_offset(name_, tile_idx); + return fragment->ondemand_metadata().file_offset(name_, tile_idx); case TileType::VAR: return fragment->file_var_offset(name_, tile_idx); case TileType::NULLABLE: @@ -467,7 +468,8 @@ class FilteredData { const uint64_t tile_idx) { switch (type) { case TileType::FIXED: - return fragment->persisted_tile_size(name_, tile_idx); + return fragment->ondemand_metadata().persisted_tile_size( + name_, tile_idx); case TileType::VAR: return fragment->persisted_tile_var_size(name_, tile_idx); case TileType::NULLABLE: diff --git a/tiledb/sm/query/readers/reader_base.cc b/tiledb/sm/query/readers/reader_base.cc index e380de81c308..e016f087fceb 100644 --- a/tiledb/sm/query/readers/reader_base.cc +++ b/tiledb/sm/query/readers/reader_base.cc @@ -450,7 +450,8 @@ void ReaderBase::load_tile_offsets( filtered_names.emplace_back(name); } - fragment->load_tile_offsets(*encryption_key, filtered_names); + fragment->ondemand_metadata().load_tile_offsets( + *encryption_key, filtered_names); return Status::Ok(); })); } diff --git a/tiledb/sm/query/readers/result_tile.h b/tiledb/sm/query/readers/result_tile.h index e303562969ed..949bcbaa07e5 100644 --- a/tiledb/sm/query/readers/result_tile.h +++ b/tiledb/sm/query/readers/result_tile.h @@ -95,7 +95,8 @@ class ResultTile { const bool validity_only, const uint64_t tile_idx) : tile_size_(validity_only ? 0 : fragment->tile_size(name, tile_idx)) - , tile_persisted_size_(fragment->persisted_tile_size(name, tile_idx)) + , tile_persisted_size_( + fragment->ondemand_metadata().persisted_tile_size(name, tile_idx)) , tile_var_size_( var_size && !validity_only ? std::optional(fragment->tile_var_size(name, tile_idx)) : diff --git a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc index ec9dc6d35acf..4e18266f8c33 100644 --- a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc +++ b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc @@ -292,7 +292,7 @@ void SparseUnorderedWithDupsReader::load_tile_offsets_data() { for (unsigned f = tile_offsets_min_frag_idx_; f < tile_offsets_max_frag_idx_; f++) { - fragment_metadata_[f]->free_tile_offsets(); + fragment_metadata_[f]->ondemand_metadata().free_tile_offsets(); } tile_offsets_min_frag_idx_ = tile_offsets_max_frag_idx_; diff --git a/tiledb/sm/serialization/fragment_metadata.cc b/tiledb/sm/serialization/fragment_metadata.cc index 999c860db8fb..6ef7b23a6d28 100644 --- a/tiledb/sm/serialization/fragment_metadata.cc +++ b/tiledb/sm/serialization/fragment_metadata.cc @@ -172,7 +172,8 @@ Status fragment_metadata_from_capnp( // refactored query, but readers on the server side require these vectors to // have the first dimension properly allocated when loading their data on // demand. - frag_meta->resize_tile_offsets_vectors(num_dims_and_attrs); + frag_meta->ondemand_metadata().resize_tile_offsets_vectors( + num_dims_and_attrs); loaded_metadata.tile_offsets_.resize(num_dims_and_attrs, false); // There is a difference in the metadata loaded for versions >= 2 @@ -182,7 +183,7 @@ Status fragment_metadata_from_capnp( auto tileoffsets_reader = frag_meta_reader.getTileOffsets(); uint64_t i = 0; for (const auto& t : tileoffsets_reader) { - auto& last = frag_meta->tile_offsets()[i]; + auto& last = frag_meta->ondemand_metadata().tile_offsets()[i]; last.reserve(t.size()); for (const auto& v : t) { last.emplace_back(v); @@ -474,7 +475,7 @@ void generic_tile_offsets_to_capnp( void fragment_meta_sizes_offsets_to_capnp( const FragmentMetadata& frag_meta, capnp::FragmentMetadata::Builder* frag_meta_builder) { - auto& tile_offsets = frag_meta.tile_offsets(); + auto& tile_offsets = frag_meta.ondemand_metadata().tile_offsets(); if (!tile_offsets.empty()) { auto builder = frag_meta_builder->initTileOffsets(tile_offsets.size()); for (uint64_t i = 0; i < tile_offsets.size(); ++i) { diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index e181dd6a9948..b79a4c1eecff 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -142,10 +142,11 @@ void InfoCommand::print_tile_sizes() const { uint64_t tile_num = f->tile_num(); std::vector names; names.push_back(name); - f->load_tile_offsets(enc_key, names); + f->ondemand_metadata().load_tile_offsets(enc_key, names); f->load_tile_var_sizes(enc_key, name); for (uint64_t tile_idx = 0; tile_idx < tile_num; tile_idx++) { - persisted_tile_size += f->persisted_tile_size(name, tile_idx); + persisted_tile_size += + f->ondemand_metadata().persisted_tile_size(name, tile_idx); in_memory_tile_size += f->tile_size(name, tile_idx); num_tiles++; if (var_size) {