From 0567f03c78a8a8adb4d59be8c31a71f544142f64 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 3 Oct 2023 13:38:13 -0500 Subject: [PATCH] Add memory tracking to Enumeration loading This adds a new MemoryTracker type for tracking Enumeration data usage. --- test/src/unit-enumerations.cc | 43 +++++++++++++++++++++++++++--- tiledb/common/memory_tracker.h | 10 +++++-- tiledb/sm/array/array.cc | 2 +- tiledb/sm/array/array_directory.cc | 18 ++++++++++--- tiledb/sm/array/array_directory.h | 7 +++-- 5 files changed, 69 insertions(+), 11 deletions(-) diff --git a/test/src/unit-enumerations.cc b/test/src/unit-enumerations.cc index 4c0e1c0c07e..8b86c1c7abd 100644 --- a/test/src/unit-enumerations.cc +++ b/test/src/unit-enumerations.cc @@ -696,7 +696,9 @@ TEST_CASE_METHOD( auto enmr_path = schema->get_enumeration_path_name(enmr_name.value()); - auto loaded = ad->load_enumerations_from_paths({enmr_path}, enc_key_); + MemoryTracker tracker; + auto loaded = + ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker); REQUIRE(loaded.size() == 1); auto enmr = loaded[0]; @@ -718,11 +720,46 @@ TEST_CASE_METHOD( auto schema = get_array_schema_latest(); auto ad = get_array_directory(); + MemoryTracker tracker; // Check that this function throws an exception when attempting to load // an unknown enumeration - REQUIRE_THROWS( - ad->load_enumerations_from_paths({"not_an_enumeration"}, enc_key_)); + auto posix_matcher = + Catch::Matchers::ContainsSubstring("No such file or directory"); + auto windows_matcher = Catch::Matchers::ContainsSubstring( + "The system cannot find the file specified."); + REQUIRE_THROWS_WITH( + ad->load_enumerations_from_paths({"unknown_enmr"}, enc_key_, tracker), + posix_matcher || windows_matcher); +} + +TEST_CASE_METHOD( + EnumerationFx, + "ArrayDirectory - Load Enumeration Memory Limit Exceeded", + "[enumeration][array-directory][error]") { + create_array(); + + auto schema = get_array_schema_latest(); + auto ad = get_array_directory(); + + auto enmr_name = schema->attribute("attr1")->get_enumeration_name(); + auto enmr_path = schema->get_enumeration_path_name(enmr_name.value()); + + MemoryTracker tracker; + tracker.set_budget(1); + + // Check that this function throws an exception when attempting to load + // an enumeration that exceeds the memory budget. + auto matcher = Catch::Matchers::ContainsSubstring( + "Error loading enumeration; Insufficient memory budget;"); + REQUIRE_THROWS_WITH( + ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker), + matcher); + + // Check that the fix is to increase the memory budget. + tracker.set_budget(std::numeric_limits::max()); + REQUIRE_NOTHROW( + ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker)); } /* ********************************* */ diff --git a/tiledb/common/memory_tracker.h b/tiledb/common/memory_tracker.h index 7f5c3e10db1..02d374474b9 100644 --- a/tiledb/common/memory_tracker.h +++ b/tiledb/common/memory_tracker.h @@ -41,7 +41,13 @@ namespace sm { class MemoryTracker { public: - enum class MemoryType { RTREE, FOOTER, TILE_OFFSETS, MIN_MAX_SUM_NULL_COUNT }; + enum class MemoryType { + RTREE, + FOOTER, + TILE_OFFSETS, + MIN_MAX_SUM_NULL_COUNT, + ENUMERATION + }; /** Constructor. */ MemoryTracker() { @@ -150,4 +156,4 @@ class MemoryTracker { } // namespace sm } // namespace tiledb -#endif // TILEDB_OPEN_ARRAY_MEMORY_TRACKER_H \ No newline at end of file +#endif // TILEDB_OPEN_ARRAY_MEMORY_TRACKER_H diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 5d1bacb8c17..069392187b4 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -640,7 +640,7 @@ std::vector> Array::get_enumerations( // Load the enumerations from storage loaded = array_dir_.load_enumerations_from_paths( - paths_to_load, get_encryption_key()); + paths_to_load, get_encryption_key(), memory_tracker_); } // Store the loaded enumerations in the schema diff --git a/tiledb/sm/array/array_directory.cc b/tiledb/sm/array/array_directory.cc index f6fdf9d8afa..8a0a52c964d 100644 --- a/tiledb/sm/array/array_directory.cc +++ b/tiledb/sm/array/array_directory.cc @@ -191,7 +191,8 @@ ArrayDirectory::load_all_array_schemas( std::vector> ArrayDirectory::load_enumerations_from_paths( const std::vector& enumeration_paths, - const EncryptionKey& encryption_key) const { + const EncryptionKey& encryption_key, + MemoryTracker& memory_tracker) const { // This should never be called with an empty list of enumeration paths, but // there's no reason to not check an early return case here given that code // changes. @@ -202,7 +203,8 @@ ArrayDirectory::load_enumerations_from_paths( std::vector> ret(enumeration_paths.size()); auto& tp = resources_.get().io_tp(); throw_if_not_ok(parallel_for(&tp, 0, enumeration_paths.size(), [&](size_t i) { - ret[i] = load_enumeration(enumeration_paths[i], encryption_key); + ret[i] = + load_enumeration(enumeration_paths[i], encryption_key, memory_tracker); return Status::Ok(); })); return ret; @@ -1313,7 +1315,8 @@ bool ArrayDirectory::consolidation_with_timestamps_supported( shared_ptr ArrayDirectory::load_enumeration( const std::string& enumeration_path, - const EncryptionKey& encryption_key) const { + const EncryptionKey& encryption_key, + MemoryTracker& memory_tracker) const { auto timer_se = resources_.get().stats().start_timer("sm_load_enumeration"); auto enmr_uri = uri_.join_path(constants::array_schema_dir_name) @@ -1323,6 +1326,15 @@ shared_ptr ArrayDirectory::load_enumeration( auto&& tile = GenericTileIO::load(resources_, enmr_uri, 0, encryption_key); resources_.get().stats().add_counter("read_enumeration_size", tile.size()); + if (!memory_tracker.take_memory( + tile.size(), MemoryTracker::MemoryType::ENUMERATION)) { + throw ArrayDirectoryException( + "Error loading enumeration; Insufficient memory budget; Needed " + + std::to_string(tile.size()) + " but only had " + + std::to_string(memory_tracker.get_memory_available()) + + " from budget " + std::to_string(memory_tracker.get_memory_budget())); + } + Deserializer deserializer(tile.data(), tile.size()); return Enumeration::deserialize(deserializer); } diff --git a/tiledb/sm/array/array_directory.h b/tiledb/sm/array/array_directory.h index b8f05468dc0..19c69cfe2ce 100644 --- a/tiledb/sm/array/array_directory.h +++ b/tiledb/sm/array/array_directory.h @@ -33,6 +33,7 @@ #ifndef TILEDB_ARRAY_DIRECTORY_H #define TILEDB_ARRAY_DIRECTORY_H +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/common/thread_pool.h" #include "tiledb/sm/array_schema/array_schema.h" @@ -394,7 +395,8 @@ class ArrayDirectory { */ std::vector> load_enumerations_from_paths( const std::vector& enumeration_paths, - const EncryptionKey& encryption_key) const; + const EncryptionKey& encryption_key, + MemoryTracker& memory_tracker) const; /** Returns the array URI. */ const URI& uri() const; @@ -826,7 +828,8 @@ class ArrayDirectory { */ shared_ptr load_enumeration( const std::string& enumeration_path, - const EncryptionKey& encryption_key) const; + const EncryptionKey& encryption_key, + MemoryTracker& memory_tracker) const; }; } // namespace sm