diff --git a/tiledb/common/random/prng.cc b/tiledb/common/random/prng.cc index 842439b49b4..1dc72c243db 100644 --- a/tiledb/common/random/prng.cc +++ b/tiledb/common/random/prng.cc @@ -33,16 +33,77 @@ #include "tiledb/common/random/prng.h" namespace tiledb::common { +/** + * 64-bit mersenne twister engine for random number generation. + * + * This definition is duplicated to avoid having it defined as `public` in + * `class PRNG`. + */ +using prng_type = std::mt19937_64; + +/** + * Implementation of the random seed. + * + * This is a class template in order to use `if constexpr`. + * + * @tparam return_size_type The type of the seed to be returned + */ +template +return_size_type random_seed() { + static constexpr size_t rng_size = sizeof(std::random_device::result_type); + static constexpr size_t ret_size = sizeof(return_size_type); + std::random_device rng{}; + /* + * We will need 64 bits to fully seed the PRNG (`ret_size`). We support cases + * where the result size of the RNG is 64 or 32 bits (`rng_size`). + */ + if constexpr (ret_size == rng_size) { + return rng(); + } else if constexpr (ret_size == 2*rng_size) { + return (rng() << rng_size) + rng(); + } else { + throw std::runtime_error("Unsupported combination of RNG sizes"); + } +} + +/** + * The PRNG used within the random constructor. + */ +prng_type prng_random() { + return prng_type{random_seed()}; // RVO +} + +/** + * The PRNG used within the default constructor. + */ +prng_type prng_default() { + /* + * Retrieve optional seed, which may or may not have been set explicitly. + */ + auto seed{Seeder::get().seed()}; + /* + * Use the seed if it has been set. Otherwise use a random seed. + */ + if (seed.has_value()) { + return prng_type{seed.value()}; // RVO + } else { + return prng_random(); // RVO + } +} /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ PRNG::PRNG() - : prng_(prng_initial()) + : prng_(prng_default()) , mtx_{} { } +PRNG::PRNG(RandomSeedT) + : prng_(prng_random()) + , mtx_{} {} + /* ********************************* */ /* API */ /* ********************************* */ @@ -61,16 +122,4 @@ uint64_t PRNG::operator()() { /* PRIVATE METHODS */ /* ********************************* */ -std::mt19937_64 PRNG::prng_initial() { - // Retrieve optional, potentially default-constructed seed. - auto seed{Seeder::get().seed()}; - - // If the seed has been set, set it on the RNG engine. - if (seed.has_value()) { - return std::mt19937_64{seed.value()}; // RVO - } else { - return {}; // RVO - } -} - } // namespace tiledb::common diff --git a/tiledb/common/random/prng.h b/tiledb/common/random/prng.h index d03f5ceacf1..849c89e63af 100644 --- a/tiledb/common/random/prng.h +++ b/tiledb/common/random/prng.h @@ -39,6 +39,80 @@ #include "tiledb/common/random/seeder.h" namespace tiledb::common { + +/** + * Marker class for a test-only PRNG constructor + */ +class RandomSeedT {}; +/** + * Marker constant + */ +static constexpr RandomSeedT RandomSeed; + +/** + * A random number generator suitable for both production and testing. + * + * @section Requirements + * + * This PRNG must support two very different kinds of situations: + * + * 1. In production use (the ordinary case) the seed must be _actually_ random + * so that the random sequences in different processes are distinct. + * 2. During most testing the seed must be deterministic to ensure that + * different test runs execute the same sequence of operations. This ensures + * that test failures can be replicated for diagnosis and correction. + * a. In particular, the seed in Catch2 test runs should be deterministic. + * 3. Certain tests, however, require actual randomness. + * a. One such test verifies that actual randomness is available per (1). Such + * tests necessarily have the possibility of failures, i.e. of false + * positives, but the actual likelihood can be made extremely low. + * b. Stress tests execute large number of test runs searching for defects. + * Such tests do not generate new information when run with previously- + * used PRNG sequences. + * + * This class satisfies these requirements with the following implementation + * choices: + * 1. If the user has not called `set_seed()` on the global seeder (from + * `Seeder::get`), then the seed is taken from `std::random_device`. + * 2. If the user has called `set_seed()` on the global seeder, that seed is + * used. + * 3. This class uses a global seeder in order to support Catch2. An event + * handler that executes at the start of the test run calls `set_seed()`. + * + * @section Maturity + * + * This class only has a default constructor. It does not have constructors that + * take seeds nor seeders. Such constructors would be useful for replicating + * test runs, but would also be premature at present. There's further test + * infrastructure required to replicate a specific test in isolation. As that + * test infrastructure matures, so also should this class. In the interim, in + * order to replicate a specific test with a specific seed, the function + * `initial_prng()` can be temporarily changed. + * + * This class uses a seeded PRNG to implement the random sequence. The + * requirement is that sequences in different processes be distinct, not that + * they be actually random. A randomly-seeded PRNG satisfies this requirement. + * The motivation for this implementation choice is as follows: + * 1. There is no standard hardware requirement for random number generation. + * While it's generally available, there are unknown variations in + * significant quality parameters such as the rate of random generation, + * duration of an RNG call, and randomness of generation (e.g. n-gram + * entropies). + * 2. In order not to stress a potentially inadequate RNG, we only call it for + * seeding and not for every number. + * 3. Qualifying a potential RNG implementation requires engineering resources + * that have not been committed as yet. + * + * @section Caveat + * + * This class uses `std::random_device` to seed the PRNG if no explicit seed is + * set. The standard library does not require that this class use an actual RNG, + * i.e. RNG from hardware of some kind. Indeed, certain earlier implementations + * did not do so and were deterministic. In order to validate that this device + * is actually random, it's necessary to run a multiprocess test to observe + * initialization in different processes. The test suite does not contain such + * a validation test at present. + */ class PRNG { public: /* ********************************* */ @@ -46,14 +120,25 @@ class PRNG { /* ********************************* */ /** - * Constructor. + * Default constructor. * - * Constructs an mt19937 engine for random number generation. - * If Seeder has been seeded, the seed will be set on the engine. - * Otherwise, it is default-constructed. + * If `Seeder` has been seeded, the seed will be set on the engine. Otherwise, + * the generator is constructed with a random seed. */ PRNG(); + /** + * Constructor for random seeding. + * + * This constructor makes an object that is always constructed with a random + * seed. + * + * @warning This constructor is only for testing. It must not be used in + * production code, where it would thwart the ability to run tests + * deterministically. + */ + PRNG(RandomSeedT); + /** Copy constructor is deleted. */ PRNG(const PRNG&) = delete; @@ -89,13 +174,6 @@ class PRNG { /** Mutex which protects against simultaneous access to operator() body. */ std::mutex mtx_; - - /* ********************************* */ - /* PRIVATE METHODS */ - /* ********************************* */ - - /** Default-constructs an mt19937 engine and optionally sets the seed. */ - std::mt19937_64 prng_initial(); }; } // namespace tiledb::common diff --git a/tiledb/common/random/seeder.h b/tiledb/common/random/seeder.h index e2807a146fb..d24efebd468 100644 --- a/tiledb/common/random/seeder.h +++ b/tiledb/common/random/seeder.h @@ -48,8 +48,10 @@ namespace tiledb::common { * default (set_seed) seed is set (seed) seed is used * but unused * - * Note that each transition may occur only once. - * i.e. A seed may only be set one time and may only be used one time. + * Note that each transition may occur only once, i.e. a seed may only be set + * one time and may only be used one time. This is an explicit design choice to + * ensure that a singleton PRNG is only initialized once, and to prevent the + * case where a seeming initialization is not the actual initialization. */ class Seeder { public: diff --git a/tiledb/common/random/test/unit_seedable_global_PRNG.cc b/tiledb/common/random/test/unit_seedable_global_PRNG.cc index b3d906207a8..3ec2c415399 100644 --- a/tiledb/common/random/test/unit_seedable_global_PRNG.cc +++ b/tiledb/common/random/test/unit_seedable_global_PRNG.cc @@ -96,6 +96,8 @@ TEST_CASE( "SeedableGlobalPRNG: operator", "[SeedableGlobalPRNG][operator][multiple]") { PRNG& prng = PRNG::get(); + // Verify that a second call succeeds. + CHECK_NOTHROW(PRNG::get()); auto rand_num1 = prng(); CHECK(rand_num1 != 0); @@ -112,7 +114,11 @@ TEST_CASE( TEST_CASE( "SeedableGlobalPRNG: Seeder singleton, errors", "[SeedableGlobalPRNG][Seeder][singleton][errors]") { - // Note: these errors will occur because PRNG sets and uses the singleton. + /* + * Retrieve a PRNG object explicitly. This will cause the PRNG to use the + * singleton seeder, after which subsequent calls should fail. + */ + [[maybe_unused]] auto& x{PRNG::get()}; Seeder& seeder_ = Seeder::get(); SECTION("try to set new seed after it's been set") { @@ -128,6 +134,16 @@ TEST_CASE( } } +/* + * Verify that randomly-seeded PRNG return different numbers. This is the best + * we can do within the ordinary way within a single-process test, the only kind + * readily available within Catch2. + */ +TEST_CASE("SeedableGlobalPRNG: Random seeding", "[SeedableGlobalPRNG]") { + PRNG x(RandomSeed), y(RandomSeed); + CHECK(x() != y()); +} + TEST_CASE("random_label", "[random_label]") { auto rand_label1 = random_label(); CHECK(rand_label1.length() == 32);