Skip to content

Commit

Permalink
Fix timestamp usage with random label.
Browse files Browse the repository at this point in the history
Random label uses the current time to know if a new label needs to use an incrementing counter so that labels generated in the same millisecond sort properly when reading back. Since the code at the higher level used a timestamp and the random label used another, it was possible to end up with timestamped names having the same millisecond, but not sorting properly. The fix is to have the random label code return the timestamp it used to the caller so that the caller can use it to generate timestamped names.

---
TYPE: NO_HISTORY
DESC: Fix timestamp usage with random label.
  • Loading branch information
KiterLuc committed Mar 26, 2024
1 parent 2029308 commit 8edff94
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 62 deletions.
21 changes: 7 additions & 14 deletions tiledb/common/random/random_label.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RandomLabelGenerator::RandomLabelGenerator()
/* ********************************* */
/* API */
/* ********************************* */
std::string RandomLabelGenerator::generate() {
RandomLabelWithTimestamp RandomLabelGenerator::generate() {
PRNG& prng = PRNG::get();
std::lock_guard<std::mutex> lock(mtx_);
auto now = tiledb::sm::utils::time::timestamp_now_ms();
Expand All @@ -69,26 +69,19 @@ std::string RandomLabelGenerator::generate() {
ss << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<uint32_t>(prng());
ss << std::hex << std::setw(16) << std::setfill('0') << prng();
return ss.str();
return {ss.str(), now};
}

std::string RandomLabelGenerator::generate_random_label() {
RandomLabelWithTimestamp RandomLabelGenerator::generate_random_label() {
static RandomLabelGenerator generator;
return generator.generate();
}

/**
* Wrapper function for `generate_random_label`, which returns a PRNG-generated
* label as a 32-digit hexadecimal random number.
* (Ex. f258d22d4db9139204eef2b4b5d860cc).
*
* @pre If multiple labels are generated within the same millisecond, they will
* be sorted using a counter on the most significant 4 bytes.
* @note Labels may be 0-padded to ensure exactly a 128-bit, 32-digit length.
*
* @return A random label.
*/
std::string random_label() {
return RandomLabelGenerator::generate_random_label().random_label_;
}

RandomLabelWithTimestamp random_label_with_timestamp() {
return RandomLabelGenerator::generate_random_label();
}

Expand Down
39 changes: 36 additions & 3 deletions tiledb/common/random/random_label.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,26 @@ class RandomLabelException : public StatusException {
}
};

/** Simple struct to return a label with timestamp. */
struct RandomLabelWithTimestamp {
/**
* Construct a new random label with timestamp object
*
* @param random_label Random label.
* @param timestamp Timestamp the random label was created.
*/
RandomLabelWithTimestamp(std::string random_label, uint64_t timestamp)
: random_label_(random_label)
, timestamp_(timestamp) {
}

/** Random label. */
std::string random_label_;

/** Timestamp the label was created. */
uint64_t timestamp_;
};

/**
* Generates a pseudeo-random label, formatted as a 32-digit hexadecimal number.
* (Ex. f258d22d4db9139204eef2b4b5d860cc).
Expand All @@ -77,12 +97,12 @@ class RandomLabelGenerator {
/* ********************************* */
/* API */
/* ********************************* */
/** Generate a random label. */
std::string generate();
/** Generate a random label with a timestamp. */
RandomLabelWithTimestamp generate();

public:
/** Generate a random label. */
static std::string generate_random_label();
static RandomLabelWithTimestamp generate_random_label();

private:
/* ********************************* */
Expand Down Expand Up @@ -112,6 +132,19 @@ class RandomLabelGenerator {
*/
std::string random_label();

/**
* Wrapper function for `generate_random_label`, which returns a PRNG-generated
* label as a 32-digit hexadecimal random number.
* (Ex. f258d22d4db9139204eef2b4b5d860cc).
*
* @pre If multiple labels are generated within the same millisecond, they will
* be sorted using a counter on the most significant 4 bytes.
* @note Labels may be 0-padded to ensure exactly a 128-bit, 32-digit length.
*
* @return A random label with timestamp.
*/
RandomLabelWithTimestamp random_label_with_timestamp();

} // namespace tiledb::common

#endif // TILEDB_HELPERS_H
8 changes: 2 additions & 6 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ Status Array::open(
set_array_schemas_all(std::move(array_schemas.value()));

// Set the timestamp
opened_array_->metadata().reset(timestamp_for_new_component());
opened_array_->metadata().reset(timestamp_end_opened_at());
} else if (
query_type == QueryType::DELETE || query_type == QueryType::UPDATE) {
{
Expand Down Expand Up @@ -421,7 +421,7 @@ Status Array::open(
}

// Updates the timestamp to use for metadata.
opened_array_->metadata().reset(timestamp_for_new_component());
opened_array_->metadata().reset(timestamp_end_opened_at());
} else {
throw ArrayException("Cannot open array; Unsupported query type.");
}
Expand Down Expand Up @@ -1137,10 +1137,6 @@ bool Array::use_refactored_array_open() const {
return refactored_array_open || use_refactored_query_submit();
}

uint64_t Array::timestamp_for_new_component() const {
return new_component_timestamp_.value_or(utils::time::timestamp_now_ms());
}

bool Array::use_refactored_query_submit() const {
auto found = false;
auto refactored_query_submit = false;
Expand Down
8 changes: 0 additions & 8 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,14 +585,6 @@ class Array {
new_component_timestamp_.value_or(0);
}

/**
* Returns the timestamp to use when writing components (fragment,
* metadata, etc.)
*
* If set to use the lastest time, this will get the time when called.
*/
uint64_t timestamp_for_new_component() const;

/** Directly set the timestamp start value. */
inline void set_timestamp_start(uint64_t timestamp_start) {
array_dir_timestamp_start_ = timestamp_start;
Expand Down
34 changes: 15 additions & 19 deletions tiledb/sm/metadata/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ Metadata::Metadata(shared_ptr<MemoryTracker> memory_tracker)
: memory_tracker_(memory_tracker)
, metadata_map_(memory_tracker_->get_resource(MemoryType::METADATA))
, metadata_index_(memory_tracker_->get_resource(MemoryType::METADATA))
, timestamp_range_([]() -> std::pair<uint64_t, uint64_t> {
, loaded_metadata_uris_(memory_tracker_->get_resource(MemoryType::METADATA))
, timestamped_name_([]() -> std::string {
auto t = utils::time::timestamp_now_ms();
return std::make_pair(t, t);
}())
, loaded_metadata_uris_(
memory_tracker_->get_resource(MemoryType::METADATA)) {
return tiledb::storage_format::generate_timestamped_name(
t, t, std::nullopt);
}()) {
build_metadata_index();
}

Expand All @@ -79,7 +79,7 @@ Metadata& Metadata::operator=(Metadata& other) {
metadata_map_.emplace(k, v);
}

timestamp_range_ = other.timestamp_range_;
timestamped_name_ = other.timestamped_name_;

for (auto& uri : other.loaded_metadata_uris_) {
loaded_metadata_uris_.emplace_back(uri);
Expand All @@ -106,7 +106,6 @@ void Metadata::clear() {
metadata_map_.clear();
metadata_index_.clear();
loaded_metadata_uris_.clear();
timestamp_range_ = std::make_pair(0, 0);
uri_ = URI();
}

Expand All @@ -118,10 +117,8 @@ URI Metadata::get_uri(const URI& array_uri) {
}

void Metadata::generate_uri(const URI& array_uri) {
auto ts_name = tiledb::storage_format::generate_timestamped_name(
timestamp_range_.first, timestamp_range_.second, std::nullopt);
uri_ = array_uri.join_path(constants::array_metadata_dir_name)
.join_path(ts_name);
.join_path(timestamped_name_);
}

std::map<std::string, Metadata::MetadataValue> Metadata::deserialize(
Expand Down Expand Up @@ -189,10 +186,6 @@ void Metadata::serialize(Serializer& serializer) const {
}
}

const std::pair<uint64_t, uint64_t>& Metadata::timestamp_range() const {
return timestamp_range_;
}

void Metadata::del(const char* key) {
assert(key != nullptr);

Expand Down Expand Up @@ -317,8 +310,10 @@ void Metadata::set_loaded_metadata_uris(
loaded_metadata_uris_.push_back(uri.uri_);
}

timestamp_range_.first = loaded_metadata_uris.front().timestamp_range_.first;
timestamp_range_.second = loaded_metadata_uris.back().timestamp_range_.second;
timestamped_name_ = tiledb::storage_format::generate_timestamped_name(
loaded_metadata_uris.front().timestamp_range_.first,
loaded_metadata_uris.back().timestamp_range_.second,
std::nullopt);
}

const tdb::pmr::vector<URI>& Metadata::loaded_metadata_uris() const {
Expand All @@ -327,14 +322,15 @@ const tdb::pmr::vector<URI>& Metadata::loaded_metadata_uris() const {

void Metadata::reset(uint64_t timestamp) {
clear();
timestamp = (timestamp != 0) ? timestamp : utils::time::timestamp_now_ms();
timestamp_range_ = std::make_pair(timestamp, timestamp);
timestamped_name_ = tiledb::storage_format::generate_timestamped_name(
timestamp, timestamp, std::nullopt);
}

void Metadata::reset(
const uint64_t timestamp_start, const uint64_t timestamp_end) {
clear();
timestamp_range_ = std::make_pair(timestamp_start, timestamp_end);
timestamped_name_ = tiledb::storage_format::generate_timestamped_name(
timestamp_start, timestamp_end, std::nullopt);
}

Metadata::iterator Metadata::begin() const {
Expand Down
12 changes: 3 additions & 9 deletions tiledb/sm/metadata/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,6 @@ class Metadata {
/** Serializes all key-value metadata items into the input buffer. */
void serialize(Serializer& serializer) const;

/** Returns the timestamp range. */
const std::pair<uint64_t, uint64_t>& timestamp_range() const;

/**
* Deletes a metadata item.
*
Expand Down Expand Up @@ -257,12 +254,6 @@ class Metadata {
/** Mutex for thread-safety. */
mutable std::mutex mtx_;

/**
* The timestamp range covered by the metadata that was read or written.
* This is used to determine the metadata file name.
*/
std::pair<uint64_t, uint64_t> timestamp_range_;

/**
* The URIs of the metadata files that have been loaded to this object.
* This is needed to know which files to delete upon consolidation.
Expand All @@ -272,6 +263,9 @@ class Metadata {
/** The URI of the array metadata file. */
URI uri_;

/** Timestamped name. */
std::string timestamped_name_;

/* ********************************* */
/* PRIVATE METHODS */
/* ********************************* */
Expand Down
9 changes: 6 additions & 3 deletions tiledb/storage_format/uri/generate_uri.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ std::string generate_timestamped_name(
"start timestamp cannot be after end timestamp.");
}

auto lbl = random_label_with_timestamp();
if (timestamp_start == 0 && timestamp_end == 0) {
timestamp_start = timestamp_end = lbl.timestamp_;
}

std::stringstream ss;
ss << "/__" << timestamp_start << "_" << timestamp_end << "_"
<< random_label();
<< lbl.random_label_;

if (version.has_value()) {
ss << "_" << version.value();
Expand All @@ -61,8 +66,6 @@ std::string generate_timestamped_name(

std::string generate_timestamped_name(
uint64_t timestamp, format_version_t format_version) {
timestamp =
(timestamp != 0) ? timestamp : sm::utils::time::timestamp_now_ms();
return generate_timestamped_name(timestamp, timestamp, format_version);
}

Expand Down

0 comments on commit 8edff94

Please sign in to comment.