Merge remote-tracking branch 'upstream/main'

stardog-union · Oct 30, 2024 · 6c97426 · 6c97426
2 parents 5b0834a + 8109046
commit 6c97426
Show file tree

Hide file tree

Showing 159 changed files with 5,841 additions and 2,097 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -32,7 +32,7 @@
 # 3. cmake ..
 # 4. make -j
 
-cmake_minimum_required(VERSION 3.10)
+cmake_minimum_required(VERSION 3.12)
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules/")
 include(ReadVersion)

diff --git a/HISTORY.md b/HISTORY.md
@@ -1,6 +1,28 @@
 # Rocksdb Change Log
 > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt`
 
+## 9.8.0 (10/25/2024)
+### New Features
+* All non-`block_cache` options in `BlockBasedTableOptions` are now mutable with `DB::SetOptions()`. See also Bug Fixes below.
+* When using iterators with BlobDB, it is now possible to load large values on an on-demand basis, i.e. only if they are actually needed by the application. This can save I/O in use cases where the values associated with certain keys are not needed. For more details, see the new read option `allow_unprepared_value` and the iterator API `PrepareValue`.
+* Add a new file ingestion option `IngestExternalFileOptions::fill_cache` to support not adding blocks from ingested files into block cache during file ingestion.
+* The option `allow_unprepared_value` is now also supported for multi-column-family iterators (i.e. `CoalescingIterator` and `AttributeGroupIterator`).
+* When a file with just one range deletion (standalone range deletion file) is ingested via bulk loading, it will be marked for compaction. During compaction, this type of files can be used to directly filter out some input files that are not protected by any snapshots and completely deleted by the standalone range deletion file.
+
+### Behavior Changes
+* During file ingestion, overlapping files level assignment are done in multiple batches, so that they can potentially be assigned to lower levels other than always land on L0.
+* OPTIONS file to be loaded by remote worker is now preserved so that it does not get purged by the primary host. A similar technique as how we are preserving new SST files from getting purged is used for this. min_options_file_numbers_ is tracked like pending_outputs_ is tracked.
+* Trim readahead_size during scans so data blocks containing keys that are not in the same prefix as the seek key in `Seek()` are not prefetched when `ReadOptions::auto_readahead_size=true` (default value) and `ReadOptions::prefix_same_as_start = true`
+* Assigning levels for external files are done in the same way for universal compaction and leveled compaction. The old behavior tends to assign files to L0 while the new behavior will assign the files to the lowest level possible.
+
+### Bug Fixes
+* Fix a longstanding race condition in SetOptions for `block_based_table_factory` options. The fix has some subtle behavior changes because of copying and replacing the TableFactory on a change with SetOptions, including requiring an Iterator::Refresh() for an existing Iterator to use the latest options.
+* Fix under counting of allocated memory in the compressed secondary cache due to looking at the compressed block size rather than the actual memory allocated, which could be larger due to internal fragmentation.
+* `GetApproximateMemTableStats()` could return disastrously bad estimates 5-25% of the time. The function has been re-engineered to return much better estimates with similar CPU cost.
+* Skip insertion of compressed blocks in the secondary cache if the lowest_used_cache_tier DB option is kVolatileTier.
+* Fix an issue in level compaction where a small CF with small compaction debt can cause the DB to allow parallel compactions. (#13054)
+* Several DB option settings could be lost through `GetOptionsFromString()`, possibly elsewhere as well. Affected options, now fixed:`background_close_inactive_wals`, `write_dbid_to_manifest`, `write_identity_file`, `prefix_seek_opt_in_only`
+
 ## 9.7.0 (09/20/2024)
 ### New Features
 * Make Cache a customizable class that can be instantiated by the object registry.

diff --git a/buckifier/buckify_rocksdb.py b/buckifier/buckify_rocksdb.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-from __future__ import absolute_import, division, print_function, unicode_literals
 
 try:
     from builtins import str
@@ -132,7 +131,7 @@ def generate_targets(repo_path, deps_map):
     if len(sys.argv) >= 2:
         # Heuristically quote and canonicalize whitespace for inclusion
         # in how the file was generated.
-        extra_argv = " '{0}'".format(" ".join(sys.argv[1].split()))
+        extra_argv = " '{}'".format(" ".join(sys.argv[1].split()))
 
     TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv)
 
@@ -213,7 +212,7 @@ def generate_targets(repo_path, deps_map):
     for src in src_mk.get("MICROBENCH_SOURCES", []):
         name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0]
         TARGETS.add_binary(name, [src], [], extra_bench_libs=True)
-    print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
+    print(f"Extra dependencies:\n{json.dumps(deps_map)}")
 
     # Dictionary test executable name -> relative source file path
     test_source_map = {}

diff --git a/buckifier/targets_builder.py b/buckifier/targets_builder.py
@@ -1,5 +1,4 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-from __future__ import absolute_import, division, print_function, unicode_literals
 
 try:
     from builtins import object, str

diff --git a/buckifier/targets_cfg.py b/buckifier/targets_cfg.py
@@ -1,5 +1,4 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-from __future__ import absolute_import, division, print_function, unicode_literals
 
 rocksdb_target_header_template = """# This file \100generated by:
 #$ python3 buckifier/buckify_rocksdb.py{extra_argv}

diff --git a/buckifier/util.py b/buckifier/util.py
@@ -2,7 +2,6 @@
 """
 This module keeps commonly used components.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
 
 try:
     from builtins import object

diff --git a/build_tools/amalgamate.py b/build_tools/amalgamate.py
@@ -25,7 +25,6 @@
 #
 # The solution is to move the include out of the #ifdef.
 
-from __future__ import print_function
 
 import argparse
 import re
@@ -62,7 +61,7 @@ def expand_include(
 
     included.add(include_path)
     with open(include_path) as f:
-        print('#line 1 "{}"'.format(include_path), file=source_out)
+        print(f'#line 1 "{include_path}"', file=source_out)
         process_file(
             f, include_path, source_out, header_out, include_paths, public_include_paths
         )
@@ -118,7 +117,7 @@ def process_file(
                     )
 
             if expanded:
-                print('#line {} "{}"'.format(line + 1, abs_path), file=source_out)
+                print(f'#line {line + 1} "{abs_path}"', file=source_out)
         elif text != "#pragma once\n":
             source_out.write(text)
 
@@ -157,8 +156,8 @@ def main():
     with open(filename) as f, open(args.source_out, "w") as source_out, open(
         args.header_out, "w"
     ) as header_out:
-        print('#line 1 "{}"'.format(filename), file=source_out)
-        print('#include "{}"'.format(header_out.name), file=source_out)
+        print(f'#line 1 "{filename}"', file=source_out)
+        print(f'#include "{header_out.name}"', file=source_out)
         process_file(
             f, abs_path, source_out, header_out, include_paths, public_include_paths
         )

diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py
@@ -102,7 +102,7 @@ def conform_opensearch(row):
 
 
 class ResultParser:
-    def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"):
+    def __init__(self, field=r"(\w|[+-:.%])+", intrafield=r"(\s)+", separator="\t"):
         self.field = re.compile(field)
         self.intra = re.compile(intrafield)
         self.sep = re.compile(separator)
@@ -159,7 +159,7 @@ def parse(self, lines):
 
 
 def load_report_from_tsv(filename: str):
-    file = open(filename, "r")
+    file = open(filename)
     contents = file.readlines()
     file.close()
     parser = ResultParser()

diff --git a/build_tools/error_filter.py b/build_tools/error_filter.py
@@ -9,7 +9,6 @@
     - Prints those error messages to stdout
 """
 
-from __future__ import absolute_import, division, print_function, unicode_literals
 
 import re
 import sys
@@ -43,7 +42,7 @@ def parse_error(self, line):
             return None
         gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
         if gtest_fail_match:
-            return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1))
+            return "{} failed: {}".format(self._last_gtest_name, gtest_fail_match.group(1))
         return None
 
 
@@ -66,52 +65,52 @@ def __init__(self):
         # format (link error):
         #   '<filename>:<line #>: error: <error msg>'
         # The below regex catches both
-        super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:")
+        super().__init__(r"\S+:\d+: error:")
 
 
 class ScanBuildErrorParser(MatchErrorParser):
     def __init__(self):
-        super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$")
+        super().__init__(r"scan-build: \d+ bugs found.$")
 
 
 class DbCrashErrorParser(MatchErrorParser):
     def __init__(self):
-        super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.")
+        super().__init__(r"\*\*\*.*\^$|TEST FAILED.")
 
 
 class WriteStressErrorParser(MatchErrorParser):
     def __init__(self):
-        super(WriteStressErrorParser, self).__init__(
+        super().__init__(
             r"ERROR: write_stress died with exitcode=\d+"
         )
 
 
 class AsanErrorParser(MatchErrorParser):
     def __init__(self):
-        super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:")
+        super().__init__(r"==\d+==ERROR: AddressSanitizer:")
 
 
 class UbsanErrorParser(MatchErrorParser):
     def __init__(self):
         # format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
-        super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:")
+        super().__init__(r"\S+:\d+:\d+: runtime error:")
 
 
 class ValgrindErrorParser(MatchErrorParser):
     def __init__(self):
         # just grab the summary, valgrind doesn't clearly distinguish errors
         # from other log messages.
-        super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:")
+        super().__init__(r"==\d+== ERROR SUMMARY:")
 
 
 class CompatErrorParser(MatchErrorParser):
     def __init__(self):
-        super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$")
+        super().__init__(r"==== .*[Ee]rror.* ====$")
 
 
 class TsanErrorParser(MatchErrorParser):
     def __init__(self):
-        super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:")
+        super().__init__(r"WARNING: ThreadSanitizer:")
 
 
 _TEST_NAME_TO_PARSERS = {

diff --git a/cache/cache.cc b/cache/cache.cc
@@ -133,7 +133,9 @@ Status Cache::CreateFromString(const ConfigOptions& config_options,
                                std::shared_ptr<Cache>* result) {
   Status status;
   std::shared_ptr<Cache> cache;
-  if (value.find("://") == std::string::npos) {
+  if (StartsWith(value, "null")) {
+    cache = nullptr;
+  } else if (value.find("://") == std::string::npos) {
     if (value.find('=') == std::string::npos) {
       cache = NewLRUCache(ParseSizeT(value));
     } else {

diff --git a/coverage/parse_gcov_output.py b/coverage/parse_gcov_output.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 
-from __future__ import print_function
 
 import optparse
 import re
@@ -109,11 +108,11 @@ def report_coverage():
 
     # Check if we need to display coverage info for interested files.
     if len(interested_files):
-        per_file_coverage = dict(
-            (fname, per_file_coverage[fname])
+        per_file_coverage = {
+            fname: per_file_coverage[fname]
             for fname in interested_files
             if fname in per_file_coverage
-        )
+        }
         # If we only interested in several files, it makes no sense to report
         # the total_coverage
         total_coverage = None

diff --git a/db/arena_wrapped_db_iter.h b/db/arena_wrapped_db_iter.h
@@ -83,6 +83,8 @@ class ArenaWrappedDBIter : public Iterator {
   Status Refresh() override;
   Status Refresh(const Snapshot*) override;
 
+  bool PrepareValue() override { return db_iter_->PrepareValue(); }
+
   void Init(Env* env, const ReadOptions& read_options,
             const ImmutableOptions& ioptions,
             const MutableCFOptions& mutable_cf_options, const Version* version,

diff --git a/db/attribute_group_iterator_impl.h b/db/attribute_group_iterator_impl.h
@@ -13,14 +13,11 @@ namespace ROCKSDB_NAMESPACE {
 class AttributeGroupIteratorImpl : public AttributeGroupIterator {
  public:
   AttributeGroupIteratorImpl(
-      const Comparator* comparator,
+      const Comparator* comparator, bool allow_unprepared_value,
       const std::vector<ColumnFamilyHandle*>& column_families,
       const std::vector<Iterator*>& child_iterators)
-      : impl_(
-            comparator, column_families, child_iterators, [this]() { Reset(); },
-            [this](const autovector<MultiCfIteratorInfo>& items) {
-              AddToAttributeGroups(items);
-            }) {}
+      : impl_(comparator, allow_unprepared_value, column_families,
+              child_iterators, ResetFunc(this), PopulateFunc(this)) {}
   ~AttributeGroupIteratorImpl() override {}
 
   // No copy allowed
@@ -45,8 +42,36 @@ class AttributeGroupIteratorImpl : public AttributeGroupIterator {
 
   void Reset() { attribute_groups_.clear(); }
 
+  bool PrepareValue() override { return impl_.PrepareValue(); }
+
  private:
-  MultiCfIteratorImpl impl_;
+  class ResetFunc {
+   public:
+    explicit ResetFunc(AttributeGroupIteratorImpl* iter) : iter_(iter) {}
+
+    void operator()() const {
+      assert(iter_);
+      iter_->Reset();
+    }
+
+   private:
+    AttributeGroupIteratorImpl* iter_;
+  };
+
+  class PopulateFunc {
+   public:
+    explicit PopulateFunc(AttributeGroupIteratorImpl* iter) : iter_(iter) {}
+
+    void operator()(const autovector<MultiCfIteratorInfo>& items) const {
+      assert(iter_);
+      iter_->AddToAttributeGroups(items);
+    }
+
+   private:
+    AttributeGroupIteratorImpl* iter_;
+  };
+
+  MultiCfIteratorImpl<ResetFunc, PopulateFunc> impl_;
   IteratorAttributeGroups attribute_groups_;
   void AddToAttributeGroups(const autovector<MultiCfIteratorInfo>& items);
 };

diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc
@@ -20,23 +20,24 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-BlobSource::BlobSource(const ImmutableOptions* immutable_options,
+BlobSource::BlobSource(const ImmutableOptions& immutable_options,
+                       const MutableCFOptions& mutable_cf_options,
                        const std::string& db_id,
                        const std::string& db_session_id,
                        BlobFileCache* blob_file_cache)
     : db_id_(db_id),
       db_session_id_(db_session_id),
-      statistics_(immutable_options->statistics.get()),
+      statistics_(immutable_options.statistics.get()),
       blob_file_cache_(blob_file_cache),
-      blob_cache_(immutable_options->blob_cache),
-      lowest_used_cache_tier_(immutable_options->lowest_used_cache_tier) {
+      blob_cache_(immutable_options.blob_cache),
+      lowest_used_cache_tier_(immutable_options.lowest_used_cache_tier) {
   auto bbto =
-      immutable_options->table_factory->GetOptions<BlockBasedTableOptions>();
+      mutable_cf_options.table_factory->GetOptions<BlockBasedTableOptions>();
   if (bbto &&
       bbto->cache_usage_options.options_overrides.at(CacheEntryRole::kBlobCache)
               .charged == CacheEntryRoleOptions::Decision::kEnabled) {
     blob_cache_ = SharedCacheInterface{std::make_shared<ChargedCache>(
-        immutable_options->blob_cache, bbto->block_cache)};
+        immutable_options.blob_cache, bbto->block_cache)};
   }
 }
 

diff --git a/db/blob/blob_source.h b/db/blob/blob_source.h
@@ -21,6 +21,7 @@
 namespace ROCKSDB_NAMESPACE {
 
 struct ImmutableOptions;
+struct MutableCFOptions;
 class Status;
 class FilePrefetchBuffer;
 class Slice;
@@ -31,7 +32,10 @@ class Slice;
 // storage with minimal cost.
 class BlobSource {
  public:
-  BlobSource(const ImmutableOptions* immutable_options,
+  // NOTE: db_id, db_session_id, and blob_file_cache are saved by reference or
+  // pointer.
+  BlobSource(const ImmutableOptions& immutable_options,
+             const MutableCFOptions& mutable_cf_options,
              const std::string& db_id, const std::string& db_session_id,
              BlobFileCache* blob_file_cache);