-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: a new strategy for auto compaction (ospp 2024) #2816
Changes from 2 commits
af1994c
9d1044b
2b59df0
1d5dfde
3521772
5a0c544
5a94fd9
bc8de13
e5a4ff5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -658,3 +658,37 @@ internal-used-unfinished-full-sync : | |||||||||||||||||
# https://github.com/OpenAtomFoundation/pika/issues/2886 | ||||||||||||||||||
# default value: true | ||||||||||||||||||
wash-data: true | ||||||||||||||||||
|
||||||||||||||||||
# Pika automatic compact compact strategy, a complement to rocksdb compact. | ||||||||||||||||||
# Trigger the compact background task periodically according to `compact-interval` | ||||||||||||||||||
# Can choose `full-compact` or `obd-compact`. | ||||||||||||||||||
# obd-compact https://github.com/OpenAtomFoundation/pika/issues/2255 | ||||||||||||||||||
compaction-strategy : obd-compact | ||||||||||||||||||
|
||||||||||||||||||
# For OBD_Compact | ||||||||||||||||||
# According to the number of sst files in rocksdb, | ||||||||||||||||||
# compact every `compact-every-num-of-files` file. | ||||||||||||||||||
compact-every-num-of-files : 10 | ||||||||||||||||||
|
||||||||||||||||||
# For OBD_Compact | ||||||||||||||||||
# In another search, if the file creation time is | ||||||||||||||||||
# greater than `force-compact-file-age-seconds`, | ||||||||||||||||||
# a compaction of the upper and lower boundaries | ||||||||||||||||||
# of the file will be performed at the same time | ||||||||||||||||||
# `compact-every-num-of-files` -1 | ||||||||||||||||||
force-compact-file-age-seconds : 300 | ||||||||||||||||||
|
||||||||||||||||||
# For OBD_Compact | ||||||||||||||||||
# According to the number of sst files in rocksdb, | ||||||||||||||||||
# compact every `compact-every-num-of-files` file. | ||||||||||||||||||
force-compact-min-delete-ratio : 10 | ||||||||||||||||||
|
||||||||||||||||||
# For OBD_Compact | ||||||||||||||||||
# According to the number of sst files in rocksdb, | ||||||||||||||||||
# compact every `compact-every-num-of-files` file. | ||||||||||||||||||
dont-compact-sst-created-in-seconds : 20 | ||||||||||||||||||
|
||||||||||||||||||
# For OBD_Compact | ||||||||||||||||||
# According to the number of sst files in rocksdb, | ||||||||||||||||||
# compact every `compact-every-num-of-files` file. | ||||||||||||||||||
best-delete-min-ratio : 10 | ||||||||||||||||||
Comment on lines
+691
to
+694
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the comment for the The current comment for the Consider updating the comment as follows: -# According to the number of sst files in rocksdb,
-# compact every `compact-every-num-of-files` file.
+# If the delete ratio of a file exceeds this threshold,
+# consider the file as a candidate for compaction. Committable suggestion
Suggested change
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -516,6 +516,9 @@ Status PikaServer::DoSameThingEveryDB(const TaskType& type) { | |||||||||||||
case TaskType::kCompactAll: | ||||||||||||||
db_item.second->Compact(storage::DataType::kAll); | ||||||||||||||
break; | ||||||||||||||
case TaskType::kCompactOldestOrBestDeleteRatioSst: | ||||||||||||||
db_item.second->LongestNotCompactiontSstCompact(storage::DataType::kAll); | ||||||||||||||
break; | ||||||||||||||
default: | ||||||||||||||
break; | ||||||||||||||
} | ||||||||||||||
|
@@ -1220,6 +1223,12 @@ void PikaServer::AutoCompactRange() { | |||||||||||||
} | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
if (g_pika_conf->compaction_strategy() == PikaConf::FullCompact) { | ||||||||||||||
DoSameThingEveryDB(TaskType::kCompactAll); | ||||||||||||||
} else if (g_pika_conf->compaction_strategy() == PikaConf::OldestOrBestDeleteRatioSstCompact) { | ||||||||||||||
DoSameThingEveryDB(TaskType::kCompactOldestOrBestDeleteRatioSst); | ||||||||||||||
} | ||||||||||||||
Comment on lines
+1227
to
+1231
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a default case to handle unexpected compaction strategies Currently, |
||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void PikaServer::AutoBinlogPurge() { DoSameThingEveryDB(TaskType::kPurgeLog); } | ||||||||||||||
|
@@ -1430,6 +1439,12 @@ void PikaServer::InitStorageOptions() { | |||||||||||||
storage_options_.options.max_bytes_for_level_base = g_pika_conf->level0_file_num_compaction_trigger() * g_pika_conf->write_buffer_size(); | ||||||||||||||
storage_options_.options.max_subcompactions = g_pika_conf->max_subcompactions(); | ||||||||||||||
storage_options_.options.target_file_size_base = g_pika_conf->target_file_size_base(); | ||||||||||||||
storage_options_.options.level0_file_num_compaction_trigger = g_pika_conf->level0_file_num_compaction_trigger(); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这些参数设置上边已经有了,1435行 |
||||||||||||||
storage_options_.options.level0_stop_writes_trigger = g_pika_conf->level0_stop_writes_trigger(); | ||||||||||||||
storage_options_.options.level0_slowdown_writes_trigger = g_pika_conf->level0_slowdown_writes_trigger(); | ||||||||||||||
storage_options_.options.min_write_buffer_number_to_merge = g_pika_conf->min_write_buffer_number_to_merge(); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这些参数1435行已经有了,可以删掉。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||||||||||
storage_options_.options.max_bytes_for_level_base = g_pika_conf->level0_file_num_compaction_trigger() * g_pika_conf->write_buffer_size(); | ||||||||||||||
storage_options_.options.max_subcompactions = g_pika_conf->max_subcompactions(); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove duplicate configuration assignments to avoid redundancy. The assignments to the storage options from lines 1442 to 1447 are duplicates of previous assignments in the same method. This can lead to confusion and maintenance issues. Apply this diff to remove the duplicated lines: - storage_options_.options.level0_file_num_compaction_trigger = g_pika_conf->level0_file_num_compaction_trigger();
- storage_options_.options.level0_stop_writes_trigger = g_pika_conf->level0_stop_writes_trigger();
- storage_options_.options.level0_slowdown_writes_trigger = g_pika_conf->level0_slowdown_writes_trigger();
- storage_options_.options.min_write_buffer_number_to_merge = g_pika_conf->min_write_buffer_number_to_merge();
- storage_options_.options.max_bytes_for_level_base = g_pika_conf->level0_file_num_compaction_trigger() * g_pika_conf->write_buffer_size();
- storage_options_.options.max_subcompactions = g_pika_conf->max_subcompactions(); 📝 Committable suggestion
Suggested change
|
||||||||||||||
storage_options_.options.max_compaction_bytes = g_pika_conf->max_compaction_bytes(); | ||||||||||||||
storage_options_.options.max_background_flushes = g_pika_conf->max_background_flushes(); | ||||||||||||||
storage_options_.options.max_background_compactions = g_pika_conf->max_background_compactions(); | ||||||||||||||
|
@@ -1483,6 +1498,13 @@ void PikaServer::InitStorageOptions() { | |||||||||||||
storage_options_.statistics_max_size = g_pika_conf->max_cache_statistic_keys(); | ||||||||||||||
storage_options_.small_compaction_threshold = g_pika_conf->small_compaction_threshold(); | ||||||||||||||
|
||||||||||||||
// For Storage compaction | ||||||||||||||
storage_options_.compact_param_.best_delete_min_ratio_ = g_pika_conf->best_delete_min_ratio(); | ||||||||||||||
storage_options_.compact_param_.dont_compact_sst_created_in_seconds_ = g_pika_conf->dont_compact_sst_created_in_seconds(); | ||||||||||||||
storage_options_.compact_param_.force_compact_file_age_seconds_ = g_pika_conf->force_compact_file_age_seconds(); | ||||||||||||||
storage_options_.compact_param_.force_compact_min_delete_ratio_ = g_pika_conf->force_compact_min_delete_ratio(); | ||||||||||||||
storage_options_.compact_param_.compact_every_num_of_files_ = g_pika_conf->compact_every_num_of_files(); | ||||||||||||||
|
||||||||||||||
Comment on lines
+1495
to
+1501
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider refactoring The |
||||||||||||||
// rocksdb blob | ||||||||||||||
if (g_pika_conf->enable_blob_files()) { | ||||||||||||||
storage_options_.options.enable_blob_files = g_pika_conf->enable_blob_files(); | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,15 @@ struct StorageOptions { | |
bool enable_db_statistics = false; | ||
size_t small_compaction_threshold = 5000; | ||
size_t small_compaction_duration_threshold = 10000; | ||
struct CompactParam { | ||
// for LongestNotCompactiontSstCompact function | ||
int compact_every_num_of_files_; | ||
int force_compact_file_age_seconds_; | ||
int force_compact_min_delete_ratio_; | ||
int dont_compact_sst_created_in_seconds_; | ||
int best_delete_min_ratio_; | ||
}; | ||
CompactParam compact_param_; | ||
Status ResetOptions(const OptionType& option_type, const std::unordered_map<std::string, std::string>& options_map); | ||
}; | ||
|
||
|
@@ -156,7 +165,8 @@ enum BitOpType { kBitOpAnd = 1, kBitOpOr, kBitOpXor, kBitOpNot, kBitOpDefault }; | |
enum Operation { | ||
kNone = 0, | ||
kCleanAll, | ||
kCompactRange | ||
kCompactRange, | ||
kCompactOldestOrBestDeleteRatioSst, | ||
Comment on lines
+168
to
+169
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Assign explicit integer values to The Apply this diff to assign explicit integer values: enum Operation {
- kNone = 0,
- kCleanAll,
- kCompactRange,
- kCompactOldestOrBestDeleteRatioSst,
+ kNone = 0,
+ kCleanAll = 1,
+ kCompactRange = 2,
+ kCompactOldestOrBestDeleteRatioSst = 3,
};
|
||
}; | ||
|
||
struct BGTask { | ||
|
@@ -1080,6 +1090,14 @@ class Storage { | |
Status DoCompactRange(const DataType& type, const std::string& start, const std::string& end); | ||
Status DoCompactSpecificKey(const DataType& type, const std::string& key); | ||
|
||
/** | ||
* LongestNotCompactiontSstCompact will execute the compact command for any cf in the given type | ||
* @param type. data type like `kStrings` | ||
* @param sync. if true, block function | ||
* @return Status | ||
*/ | ||
Status LongestNotCompactiontSstCompact(const DataType &type, bool sync = false); | ||
|
||
Status SetMaxCacheStatisticKeys(uint32_t max_cache_statistic_keys); | ||
Status SetSmallCompactionThreshold(uint32_t small_compaction_threshold); | ||
Status SetSmallCompactionDurationThreshold(uint32_t small_compaction_duration_threshold); | ||
|
@@ -1103,6 +1121,7 @@ class Storage { | |
const std::string& db_type, const std::unordered_map<std::string, std::string>& options); | ||
void GetRocksDBInfo(std::string& info); | ||
|
||
const StorageOptions& GetStorageOptions(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ensure thread safety when accessing The method |
||
// get hash cf handle in insts_[idx] | ||
std::vector<rocksdb::ColumnFamilyHandle*> GetHashCFHandles(const int idx); | ||
// get DefaultWriteOptions in insts_[idx] | ||
|
@@ -1115,6 +1134,7 @@ class Storage { | |
int db_instance_num_ = 3; | ||
int slot_num_ = 1024; | ||
bool is_classic_mode_ = true; | ||
StorageOptions storage_options_; | ||
|
||
std::unique_ptr<LRUCache<std::string, std::string>> cursors_store_; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the comment for the
force-compact-min-delete-ratio
parameter.The current comment for the
force-compact-min-delete-ratio
parameter is incorrect. It mentions triggering compaction based on the number of SST files, which is not related to the delete ratio. Please update the comment to accurately describe the purpose of the delete ratio threshold for forcing compaction.Consider updating the comment as follows:
Committable suggestion