From 8edff9412b8389dee91e30dd6dae12bd4bc9734e Mon Sep 17 00:00:00 2001 From: Luc Rancourt Date: Mon, 25 Mar 2024 23:42:34 +0100 Subject: [PATCH] Fix timestamp usage with random label. Random label uses the current time to know if a new label needs to use an incrementing counter so that labels generated in the same millisecond sort properly when reading back. Since the code at the higher level used a timestamp and the random label used another, it was possible to end up with timestamped names having the same millisecond, but not sorting properly. The fix is to have the random label code return the timestamp it used to the caller so that the caller can use it to generate timestamped names. --- TYPE: NO_HISTORY DESC: Fix timestamp usage with random label. --- tiledb/common/random/random_label.cc | 21 ++++-------- tiledb/common/random/random_label.h | 39 +++++++++++++++++++++-- tiledb/sm/array/array.cc | 8 ++--- tiledb/sm/array/array.h | 8 ----- tiledb/sm/metadata/metadata.cc | 34 +++++++++----------- tiledb/sm/metadata/metadata.h | 12 ++----- tiledb/storage_format/uri/generate_uri.cc | 9 ++++-- 7 files changed, 69 insertions(+), 62 deletions(-) diff --git a/tiledb/common/random/random_label.cc b/tiledb/common/random/random_label.cc index ba04ae92bfc..d985d4fd621 100644 --- a/tiledb/common/random/random_label.cc +++ b/tiledb/common/random/random_label.cc @@ -44,7 +44,7 @@ RandomLabelGenerator::RandomLabelGenerator() /* ********************************* */ /* API */ /* ********************************* */ -std::string RandomLabelGenerator::generate() { +RandomLabelWithTimestamp RandomLabelGenerator::generate() { PRNG& prng = PRNG::get(); std::lock_guard lock(mtx_); auto now = tiledb::sm::utils::time::timestamp_now_ms(); @@ -69,26 +69,19 @@ std::string RandomLabelGenerator::generate() { ss << std::hex << std::setw(8) << std::setfill('0') << static_cast(prng()); ss << std::hex << std::setw(16) << std::setfill('0') << prng(); - return ss.str(); + return {ss.str(), now}; } -std::string RandomLabelGenerator::generate_random_label() { +RandomLabelWithTimestamp RandomLabelGenerator::generate_random_label() { static RandomLabelGenerator generator; return generator.generate(); } -/** - * Wrapper function for `generate_random_label`, which returns a PRNG-generated - * label as a 32-digit hexadecimal random number. - * (Ex. f258d22d4db9139204eef2b4b5d860cc). - * - * @pre If multiple labels are generated within the same millisecond, they will - * be sorted using a counter on the most significant 4 bytes. - * @note Labels may be 0-padded to ensure exactly a 128-bit, 32-digit length. - * - * @return A random label. - */ std::string random_label() { + return RandomLabelGenerator::generate_random_label().random_label_; +} + +RandomLabelWithTimestamp random_label_with_timestamp() { return RandomLabelGenerator::generate_random_label(); } diff --git a/tiledb/common/random/random_label.h b/tiledb/common/random/random_label.h index 6675605f435..3e74518f71c 100644 --- a/tiledb/common/random/random_label.h +++ b/tiledb/common/random/random_label.h @@ -51,6 +51,26 @@ class RandomLabelException : public StatusException { } }; +/** Simple struct to return a label with timestamp. */ +struct RandomLabelWithTimestamp { + /** + * Construct a new random label with timestamp object + * + * @param random_label Random label. + * @param timestamp Timestamp the random label was created. + */ + RandomLabelWithTimestamp(std::string random_label, uint64_t timestamp) + : random_label_(random_label) + , timestamp_(timestamp) { + } + + /** Random label. */ + std::string random_label_; + + /** Timestamp the label was created. */ + uint64_t timestamp_; +}; + /** * Generates a pseudeo-random label, formatted as a 32-digit hexadecimal number. * (Ex. f258d22d4db9139204eef2b4b5d860cc). @@ -77,12 +97,12 @@ class RandomLabelGenerator { /* ********************************* */ /* API */ /* ********************************* */ - /** Generate a random label. */ - std::string generate(); + /** Generate a random label with a timestamp. */ + RandomLabelWithTimestamp generate(); public: /** Generate a random label. */ - static std::string generate_random_label(); + static RandomLabelWithTimestamp generate_random_label(); private: /* ********************************* */ @@ -112,6 +132,19 @@ class RandomLabelGenerator { */ std::string random_label(); +/** + * Wrapper function for `generate_random_label`, which returns a PRNG-generated + * label as a 32-digit hexadecimal random number. + * (Ex. f258d22d4db9139204eef2b4b5d860cc). + * + * @pre If multiple labels are generated within the same millisecond, they will + * be sorted using a counter on the most significant 4 bytes. + * @note Labels may be 0-padded to ensure exactly a 128-bit, 32-digit length. + * + * @return A random label with timestamp. + */ +RandomLabelWithTimestamp random_label_with_timestamp(); + } // namespace tiledb::common #endif // TILEDB_HELPERS_H diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index cf8e9bea862..395be3468b8 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -380,7 +380,7 @@ Status Array::open( set_array_schemas_all(std::move(array_schemas.value())); // Set the timestamp - opened_array_->metadata().reset(timestamp_for_new_component()); + opened_array_->metadata().reset(timestamp_end_opened_at()); } else if ( query_type == QueryType::DELETE || query_type == QueryType::UPDATE) { { @@ -421,7 +421,7 @@ Status Array::open( } // Updates the timestamp to use for metadata. - opened_array_->metadata().reset(timestamp_for_new_component()); + opened_array_->metadata().reset(timestamp_end_opened_at()); } else { throw ArrayException("Cannot open array; Unsupported query type."); } @@ -1137,10 +1137,6 @@ bool Array::use_refactored_array_open() const { return refactored_array_open || use_refactored_query_submit(); } -uint64_t Array::timestamp_for_new_component() const { - return new_component_timestamp_.value_or(utils::time::timestamp_now_ms()); -} - bool Array::use_refactored_query_submit() const { auto found = false; auto refactored_query_submit = false; diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 380deef7e9c..3b7286d709c 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -585,14 +585,6 @@ class Array { new_component_timestamp_.value_or(0); } - /** - * Returns the timestamp to use when writing components (fragment, - * metadata, etc.) - * - * If set to use the lastest time, this will get the time when called. - */ - uint64_t timestamp_for_new_component() const; - /** Directly set the timestamp start value. */ inline void set_timestamp_start(uint64_t timestamp_start) { array_dir_timestamp_start_ = timestamp_start; diff --git a/tiledb/sm/metadata/metadata.cc b/tiledb/sm/metadata/metadata.cc index d5b585147e6..b93c3a50ca8 100644 --- a/tiledb/sm/metadata/metadata.cc +++ b/tiledb/sm/metadata/metadata.cc @@ -60,12 +60,12 @@ Metadata::Metadata(shared_ptr memory_tracker) : memory_tracker_(memory_tracker) , metadata_map_(memory_tracker_->get_resource(MemoryType::METADATA)) , metadata_index_(memory_tracker_->get_resource(MemoryType::METADATA)) - , timestamp_range_([]() -> std::pair { + , loaded_metadata_uris_(memory_tracker_->get_resource(MemoryType::METADATA)) + , timestamped_name_([]() -> std::string { auto t = utils::time::timestamp_now_ms(); - return std::make_pair(t, t); - }()) - , loaded_metadata_uris_( - memory_tracker_->get_resource(MemoryType::METADATA)) { + return tiledb::storage_format::generate_timestamped_name( + t, t, std::nullopt); + }()) { build_metadata_index(); } @@ -79,7 +79,7 @@ Metadata& Metadata::operator=(Metadata& other) { metadata_map_.emplace(k, v); } - timestamp_range_ = other.timestamp_range_; + timestamped_name_ = other.timestamped_name_; for (auto& uri : other.loaded_metadata_uris_) { loaded_metadata_uris_.emplace_back(uri); @@ -106,7 +106,6 @@ void Metadata::clear() { metadata_map_.clear(); metadata_index_.clear(); loaded_metadata_uris_.clear(); - timestamp_range_ = std::make_pair(0, 0); uri_ = URI(); } @@ -118,10 +117,8 @@ URI Metadata::get_uri(const URI& array_uri) { } void Metadata::generate_uri(const URI& array_uri) { - auto ts_name = tiledb::storage_format::generate_timestamped_name( - timestamp_range_.first, timestamp_range_.second, std::nullopt); uri_ = array_uri.join_path(constants::array_metadata_dir_name) - .join_path(ts_name); + .join_path(timestamped_name_); } std::map Metadata::deserialize( @@ -189,10 +186,6 @@ void Metadata::serialize(Serializer& serializer) const { } } -const std::pair& Metadata::timestamp_range() const { - return timestamp_range_; -} - void Metadata::del(const char* key) { assert(key != nullptr); @@ -317,8 +310,10 @@ void Metadata::set_loaded_metadata_uris( loaded_metadata_uris_.push_back(uri.uri_); } - timestamp_range_.first = loaded_metadata_uris.front().timestamp_range_.first; - timestamp_range_.second = loaded_metadata_uris.back().timestamp_range_.second; + timestamped_name_ = tiledb::storage_format::generate_timestamped_name( + loaded_metadata_uris.front().timestamp_range_.first, + loaded_metadata_uris.back().timestamp_range_.second, + std::nullopt); } const tdb::pmr::vector& Metadata::loaded_metadata_uris() const { @@ -327,14 +322,15 @@ const tdb::pmr::vector& Metadata::loaded_metadata_uris() const { void Metadata::reset(uint64_t timestamp) { clear(); - timestamp = (timestamp != 0) ? timestamp : utils::time::timestamp_now_ms(); - timestamp_range_ = std::make_pair(timestamp, timestamp); + timestamped_name_ = tiledb::storage_format::generate_timestamped_name( + timestamp, timestamp, std::nullopt); } void Metadata::reset( const uint64_t timestamp_start, const uint64_t timestamp_end) { clear(); - timestamp_range_ = std::make_pair(timestamp_start, timestamp_end); + timestamped_name_ = tiledb::storage_format::generate_timestamped_name( + timestamp_start, timestamp_end, std::nullopt); } Metadata::iterator Metadata::begin() const { diff --git a/tiledb/sm/metadata/metadata.h b/tiledb/sm/metadata/metadata.h index 2e45614eaea..ec883dfdf51 100644 --- a/tiledb/sm/metadata/metadata.h +++ b/tiledb/sm/metadata/metadata.h @@ -136,9 +136,6 @@ class Metadata { /** Serializes all key-value metadata items into the input buffer. */ void serialize(Serializer& serializer) const; - /** Returns the timestamp range. */ - const std::pair& timestamp_range() const; - /** * Deletes a metadata item. * @@ -257,12 +254,6 @@ class Metadata { /** Mutex for thread-safety. */ mutable std::mutex mtx_; - /** - * The timestamp range covered by the metadata that was read or written. - * This is used to determine the metadata file name. - */ - std::pair timestamp_range_; - /** * The URIs of the metadata files that have been loaded to this object. * This is needed to know which files to delete upon consolidation. @@ -272,6 +263,9 @@ class Metadata { /** The URI of the array metadata file. */ URI uri_; + /** Timestamped name. */ + std::string timestamped_name_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ diff --git a/tiledb/storage_format/uri/generate_uri.cc b/tiledb/storage_format/uri/generate_uri.cc index d823936c13d..03c8ad2a7da 100644 --- a/tiledb/storage_format/uri/generate_uri.cc +++ b/tiledb/storage_format/uri/generate_uri.cc @@ -48,9 +48,14 @@ std::string generate_timestamped_name( "start timestamp cannot be after end timestamp."); } + auto lbl = random_label_with_timestamp(); + if (timestamp_start == 0 && timestamp_end == 0) { + timestamp_start = timestamp_end = lbl.timestamp_; + } + std::stringstream ss; ss << "/__" << timestamp_start << "_" << timestamp_end << "_" - << random_label(); + << lbl.random_label_; if (version.has_value()) { ss << "_" << version.value(); @@ -61,8 +66,6 @@ std::string generate_timestamped_name( std::string generate_timestamped_name( uint64_t timestamp, format_version_t format_version) { - timestamp = - (timestamp != 0) ? timestamp : sm::utils::time::timestamp_now_ms(); return generate_timestamped_name(timestamp, timestamp, format_version); }