diff --git a/tiledb/common/random/CMakeLists.txt b/tiledb/common/random/CMakeLists.txt index edff498f6394..4717ba38f59f 100644 --- a/tiledb/common/random/CMakeLists.txt +++ b/tiledb/common/random/CMakeLists.txt @@ -3,7 +3,7 @@ # # The MIT License # -# Copyright (c) 2023 TileDB, Inc. +# Copyright (c) 2023-2024 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -37,6 +37,7 @@ gather_sources(${SOURCES}) commence(object_library seedable_global_PRNG) this_target_sources(${SOURCES}) this_target_link_libraries(export) + this_target_object_libraries(baseline time) conclude(object_library) add_test_subdirectory() diff --git a/tiledb/common/random/random_label.cc b/tiledb/common/random/random_label.cc index cc962b94d0ab..57654e9d8deb 100644 --- a/tiledb/common/random/random_label.cc +++ b/tiledb/common/random/random_label.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -31,33 +31,26 @@ */ #include "tiledb/common/random/random_label.h" -#include "tiledb/common/random/prng.h" - -#include -#include namespace tiledb::common { -/** - * Legacy code provides randomness using UUIDs, which are always 128 bits, - * represented as a 32-digit hexadecimal value. - * - * To ensure backward compatibility, this function formats the PRNG-generated - * values to be precisely a 32-digit hexadecimal value. Each value is padded - * with 0s such that it makes up one 16-digit half of the full 32-digit number. - */ -std::string random_label() { - PRNG& prng = PRNG::get(); - std::stringstream ss; +/* ********************************* */ +/* CONSTRUCTORS & DESTRUCTORS */ +/* ********************************* */ +RandomLabelGenerator::RandomLabelGenerator() + : prev_time_(tiledb::sm::utils::time::timestamp_now_ms()) { +} - // Generate and format a 128-bit, 32-digit hexadecimal random number - auto rand1 = prng(); - ss << std::hex << std::setw(16) << std::setfill('0') << rand1; - auto rand2 = prng(); - ss << std::hex << std::setw(16) << std::setfill('0') << rand2; +/* ********************************* */ +/* API */ +/* ********************************* */ +std::string RandomLabelGenerator::generate_random_label() { + static RandomLabelGenerator generator; + return generator.generate(); +} - // Return label string - return ss.str(); +std::string random_label() { + return RandomLabelGenerator::generate_random_label(); } } // namespace tiledb::common diff --git a/tiledb/common/random/random_label.h b/tiledb/common/random/random_label.h index 600a5831039b..b637ef4db4cd 100644 --- a/tiledb/common/random/random_label.h +++ b/tiledb/common/random/random_label.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,10 +33,91 @@ #ifndef TILEDB_HELPERS_H #define TILEDB_HELPERS_H +#include +#include +#include #include +#include "tiledb/common/exception/exception.h" +#include "tiledb/common/random/prng.h" +#include "tiledb/sm/misc/tdb_time.h" + namespace tiledb::common { +class RandomLabelException : public StatusException { + public: + explicit RandomLabelException(const std::string& message) + : StatusException("RandomLabel", message) { + } +}; + +class RandomLabelGenerator { + public: + /* ********************************* */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ********************************* */ + DISABLE_COPY_AND_COPY_ASSIGN(RandomLabelGenerator); + DISABLE_MOVE_AND_MOVE_ASSIGN(RandomLabelGenerator); + + /** Default destructor. */ + ~RandomLabelGenerator() = default; + + protected: + /** Protected constructor, abstracted by public-facing accessor. */ + RandomLabelGenerator(); + + /* ********************************* */ + /* API */ + /* ********************************* */ + /** Generate a random label. */ + std::string generate() { + std::lock_guard lock(mtx_); + PRNG& prng = PRNG::get(); + auto now = tiledb::sm::utils::time::timestamp_now_ms(); + + // If no label has been generated this millisecond, generate a new one. + if (now != prev_time_) { + prev_time_ = now; + counter_ = static_cast(prng()); + // Clear the top bit of the counter such that a full 2 billion values + // could be generated within a single millisecond. + counter_ &= 0x7FFFFFFF; + } else { + counter_ += 1; + if (counter_ == 0) { + throw RandomLabelException("Maximum generation frequency exceeded."); + } + } + + // Generate and format a 128-bit, 32-digit hexadecimal random number + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << counter_; + ss << std::hex << std::setw(8) << std::setfill('0') + << static_cast(prng()); + ss << std::hex << std::setw(16) << std::setfill('0') << prng(); + return ss.str(); + } + + public: + /** Generate a random label. */ + static std::string generate_random_label(); + + private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + static RandomLabelGenerator singleton_; + + /** Mutex which protects against simultaneous random label generation. */ + std::mutex mtx_; + + /** The time (in milliseconds) of the last label creation. */ + uint64_t prev_time_; + + /** The submillsecond counter portion of the random label. */ + uint32_t counter_; +}; + /** * Returns a PRNG-generated label as a 32-digit hexadecimal random number. * (Ex. f258d22d4db9139204eef2b4b5d860cc). diff --git a/tiledb/common/random/test/CMakeLists.txt b/tiledb/common/random/test/CMakeLists.txt index 2a8754ac7dcd..508aab5f121b 100644 --- a/tiledb/common/random/test/CMakeLists.txt +++ b/tiledb/common/random/test/CMakeLists.txt @@ -3,7 +3,7 @@ # # The MIT License # -# Copyright (c) 2023 TileDB, Inc. +# Copyright (c) 2023-2024 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,6 +27,6 @@ include(unit_test) commence(unit_test seedable_global_PRNG) - this_target_sources(unit_seedable_global_PRNG.cc) + this_target_sources(unit_random_label_generator.cc unit_seedable_global_PRNG.cc) this_target_object_libraries(seedable_global_PRNG) conclude(unit_test) diff --git a/tiledb/common/random/test/unit_random_label_generator.cc b/tiledb/common/random/test/unit_random_label_generator.cc new file mode 100644 index 000000000000..529608a42ed0 --- /dev/null +++ b/tiledb/common/random/test/unit_random_label_generator.cc @@ -0,0 +1,138 @@ +/** + * @file tiledb/common/random/test/unit_random_label_generator.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Tests for the random label generator. + */ + +#include +#include "../random_label.h" + +using namespace tiledb::common; +using namespace tiledb::sm; + +size_t generate_labels(std::vector& labels) { + size_t labels_size = labels.size(); + auto now = utils::time::timestamp_now_ms(); + size_t idx = 0; + while ((utils::time::timestamp_now_ms()) < now + 100 && idx < labels_size) { + labels[idx++] = random_label(); + } + + return idx; +} + +void validate_labels(std::vector& labels, size_t num_labels) { + // Given the label randomness and the fact that we're racing the processor, + // the best we can do here (for now) is assert that there's 10 ordered groups. + // In this manner, groups are defined as sharing the first 4 bytes. + uint64_t num_groups = 0; + uint64_t this_group = 0; + for (size_t i = 1; i < num_labels; i++) { + bool match = true; + for (size_t j = 0; j < 4; j++) { + if (labels[i - 1][j] != labels[i][j]) { + match = false; + break; + } + } + if (!match) { + if (this_group > 10) { + num_groups += 1; + } + this_group = 0; + continue; + } + + // We share a prefix so assert that they're ordered. + REQUIRE(labels[i] > labels[i - 1]); + this_group += 1; + } + + REQUIRE(num_groups > 10); +} + +TEST_CASE( + "RandomLabelGenerator: serial generation", + "[RandomLabelGenerator][serial]") { + // Generate a random label to validate initialization. + auto label = random_label(); + REQUIRE(label.size() == 32); + + // Test one million strings. Let's assume the buffer overflow check works. + std::vector labels{1000000}; + auto num_labels = generate_labels(labels); + validate_labels(labels, num_labels); +} + +TEST_CASE( + "RandomLabelGenerator: parallel generation", + "[RandomLabelGenerator][parallel]") { + const unsigned nthreads = 20; + std::vector threads; + std::vector> labels{nthreads}; + size_t num_labels[nthreads]; + + // Pre-allocate our buffers so we're getting as much contention as possible + for (size_t i = 0; i < nthreads; i++) { + labels[i].resize(1000000); + } + + // Generate labels simultaneously in multiple threads. + for (size_t i = 0; i < nthreads; i++) { + auto num_ptr = &num_labels[i]; + auto vec_ptr = &labels[i]; + threads.emplace_back([num_ptr, vec_ptr]() { + auto num = generate_labels(*vec_ptr); + *num_ptr = num; + }); + } + + // Wait for all of our threads to finish. + for (auto& t : threads) { + t.join(); + } + + // Check that we've generated the correct number of random labels. + std::unordered_set label_set; + size_t total_labels = 0; + for (size_t i = 0; i < nthreads; i++) { + total_labels += num_labels[i]; + for (size_t j = 0; j < num_labels[i]; j++) { + label_set.insert(labels[i][j]); + } + } + REQUIRE(label_set.size() == total_labels); + + // Sort and validate the parallel threads as if they were serially generated. + std::vector all_labels{total_labels}; + size_t idx = 0; + for (auto label : label_set) { + all_labels[idx++] = label; + } + std::sort(all_labels.begin(), all_labels.end()); + validate_labels(all_labels, total_labels); +}