diff --git a/DEFAULT_OPTIONS_HISTORY.md b/DEFAULT_OPTIONS_HISTORY.md index 26280ee34de..82c64d5235c 100644 --- a/DEFAULT_OPTIONS_HISTORY.md +++ b/DEFAULT_OPTIONS_HISTORY.md @@ -1,4 +1,4 @@ -# RocksDB default options change log +# RocksDB default options change log (NO LONGER MAINTAINED) ## Unreleased * delayed_write_rate takes the rate given by rate_limiter if not specified. diff --git a/HISTORY.md b/HISTORY.md index 7bcad2b84ae..7ae9bbbafcc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,11 +4,15 @@ * Added values to `TraceFilterType`: `kTraceFilterIteratorSeek`, `kTraceFilterIteratorSeekForPrev`, and `kTraceFilterMultiGet`. They can be set in `TraceOptions` to filter out the operation types after which they are named. * Added `TraceOptions::preserve_write_order`. When enabled it guarantees write records are traced in the same order they are logged to WAL and applied to the DB. By default it is disabled (false) to match the legacy behavior and prevent regression. * Made the Env class extend the Customizable class. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* `Options::OldDefaults` is marked deprecated, as it is no longer maintained. * Add ObjectLibrary::AddFactory and ObjectLibrary::PatternEntry classes. This method and associated class are the preferred mechanism for registering factories with the ObjectLibrary going forward. The ObjectLibrary::Register method, which uses regular expressions and may be problematic, is deprecated and will be in a future release. ### Behavior Changes * `DB::DestroyColumnFamilyHandle()` will return Status::InvalidArgument() if called with `DB::DefaultColumnFamily()`. +### New Features +* Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.) + ### Bug Fixes * Fix a bug that FlushMemTable may return ok even flush not succeed. diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 130b16fda2c..26d02b0f120 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -478,6 +478,8 @@ Options DBTestBase::GetOptions( break; case kXXH3Checksum: { table_options.checksum = kXXH3; + // Thrown in here for basic coverage: + options.DisableExtraChecks(); break; } case kFIFOCompaction: { diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 1ee09bb4b91..fbabcd771a0 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -736,7 +736,9 @@ struct AdvancedColumnFamilyOptions { // LSM changes (Flush, Compaction, AddFile). When this option is true, these // checks are also enabled in release mode. These checks were historically // disabled in release mode, but are now enabled by default for proactive - // corruption detection, at almost no cost in extra CPU. + // corruption detection. The CPU overhead is negligible for normal mixed + // operations but can slow down saturated writing. See + // Options::DisableExtraChecks(). // Default: true bool force_consistency_checks = true; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0b406db3dbc..e3c7d8ead97 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1365,7 +1365,11 @@ struct Options : public DBOptions, public ColumnFamilyOptions { const ColumnFamilyOptions& column_family_options) : DBOptions(db_options), ColumnFamilyOptions(column_family_options) {} - // The function recovers options to the option as in version 4.6. + // Change to some default settings from an older version. + // NOT MAINTAINED: This function has not been and is not maintained. + // DEPRECATED: This function might be removed in a future release. + // In general, defaults are changed to suit broad interests. Opting + // out of a change on upgrade should be deliberate and considered. Options* OldDefaults(int rocksdb_major_version = 4, int rocksdb_minor_version = 6); @@ -1388,6 +1392,12 @@ struct Options : public DBOptions, public ColumnFamilyOptions { // Use this if your DB is very small (like under 1GB) and you don't want to // spend lots of memory for memtables. Options* OptimizeForSmallDb(); + + // Disable some checks that should not be necessary in the absence of + // software logic errors or CPU+memory hardware errors. This can improve + // write speeds but is only recommended for temporary use. Does not + // change protection against corrupt storage (e.g. verify_checksums). + Options* DisableExtraChecks(); }; // diff --git a/options/options.cc b/options/options.cc index 969bc31a854..a64e1e7b9c5 100644 --- a/options/options.cc +++ b/options/options.cc @@ -474,6 +474,19 @@ Options* Options::OptimizeForSmallDb() { return this; } +Options* Options::DisableExtraChecks() { + // See https://github.com/facebook/rocksdb/issues/9354 + force_consistency_checks = false; + // Considered but no clear performance impact seen: + // * check_flush_compaction_key_order + // * paranoid_checks + // * flush_verify_memtable_count + // By current API contract, not including + // * verify_checksums + // because checking storage data integrity is a more standard practice. + return this; +} + Options* Options::OldDefaults(int rocksdb_major_version, int rocksdb_minor_version) { ColumnFamilyOptions::OldDefaults(rocksdb_major_version, diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 4909c52a57d..aa4469c18b3 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -837,11 +837,25 @@ DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/" "deletepercent), so deletepercent must be smaller than (100 - " "FLAGS_readwritepercent)"); -DEFINE_bool(optimize_filters_for_hits, false, +DEFINE_bool(optimize_filters_for_hits, + ROCKSDB_NAMESPACE::Options().optimize_filters_for_hits, "Optimizes bloom filters for workloads for most lookups return " "a value. For now this doesn't create bloom filters for the max " "level of the LSM to reduce metadata that should fit in RAM. "); +DEFINE_bool(paranoid_checks, ROCKSDB_NAMESPACE::Options().paranoid_checks, + "RocksDB will aggressively check consistency of the data."); + +DEFINE_bool(force_consistency_checks, + ROCKSDB_NAMESPACE::Options().force_consistency_checks, + "Runs consistency checks on the LSM every time a change is " + "applied."); + +DEFINE_bool(check_flush_compaction_key_order, + ROCKSDB_NAMESPACE::Options().check_flush_compaction_key_order, + "During flush or compaction, check whether keys inserted to " + "output files are in order."); + DEFINE_uint64(delete_obsolete_files_period_micros, 0, "Ignored. Left here for backward compatibility"); @@ -4304,6 +4318,10 @@ class Benchmark { options.max_compaction_bytes = FLAGS_max_compaction_bytes; options.disable_auto_compactions = FLAGS_disable_auto_compactions; options.optimize_filters_for_hits = FLAGS_optimize_filters_for_hits; + options.paranoid_checks = FLAGS_paranoid_checks; + options.force_consistency_checks = FLAGS_force_consistency_checks; + options.check_flush_compaction_key_order = + FLAGS_check_flush_compaction_key_order; options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds; options.ttl = FLAGS_ttl_seconds; // fill storage options