From d9fceab4443a6695e82a79a8c59045e467bc3bd1 Mon Sep 17 00:00:00 2001 From: saz97 Date: Thu, 13 Jun 2024 19:28:31 +0800 Subject: [PATCH 1/9] pfmerge input bug --- src/pika_command.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pika_command.cc b/src/pika_command.cc index 81c23c2533..da27b08d39 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -703,7 +703,7 @@ void InitCmdTable(CmdTable* cmd_table) { cmd_table->insert(std::pair>(kCmdNamePfCount, std::move(pfcountptr))); ////pfmergeCmd std::unique_ptr pfmergeptr = std::make_unique( - kCmdNamePfMerge, -3, kCmdFlagsWrite | kCmdFlagsHyperLogLog | kCmdFlagsSlow); + kCmdNamePfMerge, -2, kCmdFlagsWrite | kCmdFlagsHyperLogLog | kCmdFlagsSlow); cmd_table->insert(std::pair>(kCmdNamePfMerge, std::move(pfmergeptr))); // GEO From e059d63402267398fd509f8a8d72b44b6124b550 Mon Sep 17 00:00:00 2001 From: saz97 Date: Mon, 17 Jun 2024 14:34:28 +0800 Subject: [PATCH 2/9] use one bit in reserve to add isolation between string and hyperloglog --- src/storage/src/redis.h | 2 + src/storage/src/redis_hyperloglog.cc | 64 +++++- src/storage/src/storage.cc | 18 +- src/storage/src/strings_value_format.h | 29 ++- tests/assets/default.conf | 27 ++- tests/unit/type/hyperloglog.tcl | 262 +++++++++++++++++++++++++ 6 files changed, 386 insertions(+), 16 deletions(-) create mode 100644 tests/unit/type/hyperloglog.tcl diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 84f95b67e5..50639d9b0b 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -154,6 +154,7 @@ class Redis { Status BitOp(BitOpType op, const std::string& dest_key, const std::vector& src_keys, std::string &value_to_dest, int64_t* ret); Status Decrby(const Slice& key, int64_t value, int64_t* ret); Status Get(const Slice& key, std::string* value); + Status HyperloglogGet(const Slice& key, std::string* value); Status MGet(const Slice& key, std::string* value); Status GetWithTTL(const Slice& key, std::string* value, int64_t* ttl); Status GetBit(const Slice& key, int64_t offset, int32_t* ret); @@ -166,6 +167,7 @@ class Redis { Status MSet(const std::vector& kvs); Status MSetnx(const std::vector& kvs, int32_t* ret); Status Set(const Slice& key, const Slice& value); + Status HyperloglogSet(const Slice& key, const Slice& value); Status Setxx(const Slice& key, const Slice& value, int32_t* ret, int64_t ttl = 0); Status SetBit(const Slice& key, int64_t offset, int32_t value, int32_t* ret); Status Setex(const Slice& key, const Slice& value, int64_t ttl); diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index 52dae42465..10e88db380 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -3,11 +3,18 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. -#include "src/redis_hyperloglog.h" + #include #include #include +#include + #include "src/storage_murmur3.h" +#include "storage/storage_define.h" +#include "src/redis.h" +#include "src/mutex.h" +#include "src/redis_hyperloglog.h" +#include "src/scope_record_lock.h" namespace storage { @@ -111,4 +118,59 @@ std::string HyperLogLog::Merge(const HyperLogLog& hll) { // ::__builtin_ctz(x): 返回右起第一个‘1’之后的0的个数 uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std::min(b, ::__builtin_ctz(x))) + 1; } + +bool IsHyperloglogObj(std::string* internal_value_str){ + size_t offset = 0; + size_t kStringsValueSuffixLength = 2 * kTimestampLength + kSuffixReserveLength; + char reserve[16] = {0}; + offset += kTypeLength; + rocksdb::Slice user_value_; + offset += (rocksdb::Slice(internal_value_str->data() + offset, + internal_value_str->size() - kStringsValueSuffixLength - offset)).size(); + memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); + + bool res = (reserve[0] & 0x80) != 0;//if first bit in reserve is 0 , then this obj is string; else the obj is hll + return res; +} + +Status Redis::HyperloglogGet(const Slice& key, std::string* value) { + value->clear(); + + BaseKey base_key(key); + Status s = db_->Get(default_read_options_, base_key.Encode(), value); + std::string meta_value = *value; + if (s.ok()){ + if (!ExpectedMetaValue(DataType::kStrings, meta_value)){ + if (ExpectedStale(meta_value)) { + s = Status::NotFound(); + } else { + return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + ", expect type: " + "strings " + "get type: " + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } + } + else if (!IsHyperloglogObj(value)){ + return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + ", expect type: " + "hyperloglog " + "get type: " + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } + else { + ParsedStringsValue parsed_strings_value(value); + if (parsed_strings_value.IsStale()) { + value->clear(); + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + } + } + + } + + return s; +} + +Status Redis::HyperloglogSet(const Slice& key, const Slice& value) { + HyperloglogValue hyperloglog_value(value); + ScopeRecordLock l(lock_mgr_, key); + + BaseKey base_key(key); + return db_->Put(default_write_options_, base_key.Encode(), hyperloglog_value.Encode()); +} + } // namespace storage diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index eff2a82176..fc0b6138d4 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -1548,7 +1548,7 @@ Status Storage::PfAdd(const Slice& key, const std::vector& values, std::string registers; std::string result; auto& inst = GetDBInstance(key); - Status s = inst->Get(key, &value); + Status s = inst->HyperloglogGet(key, &value); if (s.ok()) { registers = value; } else if (s.IsNotFound()) { @@ -1566,7 +1566,7 @@ Status Storage::PfAdd(const Slice& key, const std::vector& values, if (previous != now || (s.IsNotFound() && values.empty())) { *update = true; } - s = inst->Set(key, result); + s = inst->HyperloglogSet(key, result); return s; } @@ -1578,19 +1578,21 @@ Status Storage::PfCount(const std::vector& keys, int64_t* result) { std::string value; std::string first_registers; auto& inst = GetDBInstance(keys[0]); - Status s = inst->Get(keys[0], &value); + Status s = inst->HyperloglogGet(keys[0], &value); if (s.ok()) { first_registers = std::string(value.data(), value.size()); } else if (s.IsNotFound()) { first_registers = ""; + } else { + return s; } - + LOG(INFO) << s.ToString() << std::endl; HyperLogLog first_log(kPrecision, first_registers); for (size_t i = 1; i < keys.size(); ++i) { std::string value; std::string registers; auto& inst = GetDBInstance(keys[i]); - s = inst->Get(keys[i], &value); + s = inst->HyperloglogGet(keys[i], &value); if (s.ok()) { registers = value; } else if (s.IsNotFound()) { @@ -1615,7 +1617,7 @@ Status Storage::PfMerge(const std::vector& keys, std::string& value std::string first_registers; std::string result; auto& inst = GetDBInstance(keys[0]); - s = inst->Get(keys[0], &value); + s = inst->HyperloglogGet(keys[0], &value); if (s.ok()) { first_registers = std::string(value.data(), value.size()); } else if (s.IsNotFound()) { @@ -1628,7 +1630,7 @@ Status Storage::PfMerge(const std::vector& keys, std::string& value std::string value; std::string registers; auto& tmp_inst = GetDBInstance(keys[i]); - s = tmp_inst->Get(keys[i], &value); + s = tmp_inst->HyperloglogGet(keys[i], &value); if (s.ok()) { registers = std::string(value.data(), value.size()); } else if (s.IsNotFound()) { @@ -1640,7 +1642,7 @@ Status Storage::PfMerge(const std::vector& keys, std::string& value result = first_log.Merge(log); } auto& ninst = GetDBInstance(keys[0]); - s = ninst->Set(keys[0], result); + s = ninst->HyperloglogSet(keys[0], result); value_to_dest = std::move(result); return s; } diff --git a/src/storage/src/strings_value_format.h b/src/storage/src/strings_value_format.h index 96b9d4d279..4a17af9cdb 100644 --- a/src/storage/src/strings_value_format.h +++ b/src/storage/src/strings_value_format.h @@ -23,13 +23,36 @@ class StringsValue : public InternalValue { size_t usize = user_value_.size(); size_t needed = usize + kSuffixReserveLength + 2 * kTimestampLength + kTypeLength; char* dst = ReAllocIfNeeded(needed); - memcpy(dst, &type_, sizeof(type_)); + memcpy(dst, &type_, sizeof(type_));//set type as Kstring dst += sizeof(type_); char* start_pos = dst; - memcpy(dst, user_value_.data(), usize); + memcpy(dst, user_value_.data(), usize);//copy real value dst += usize; - memcpy(dst, reserve_, kSuffixReserveLength); + memcpy(dst, reserve_, kSuffixReserveLength);//copy reserve + dst += kSuffixReserveLength; + EncodeFixed64(dst, ctime_); + dst += kTimestampLength; + EncodeFixed64(dst, etime_); + return {start_, needed}; + } +}; + +class HyperloglogValue : public InternalValue { + public: + explicit HyperloglogValue(const rocksdb::Slice& user_value) : InternalValue(DataType::kStrings, user_value) {} + virtual rocksdb::Slice Encode() override { + size_t usize = user_value_.size(); + size_t needed = usize + kSuffixReserveLength + 2 * kTimestampLength + kTypeLength; + char* dst = ReAllocIfNeeded(needed); + memcpy(dst, &type_, sizeof(type_));//set type as Kstring + dst += sizeof(type_); + char* start_pos = dst; + + memcpy(dst, user_value_.data(), usize);//copy real value + dst += usize; + reserve_[0] = 0x80; + memcpy(dst, reserve_, kSuffixReserveLength);//copy reserve dst += kSuffixReserveLength; EncodeFixed64(dst, ctime_); dst += kTimestampLength; diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 468d253e89..1a7b815885 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -34,10 +34,17 @@ slow-cmd-thread-pool-size : 1 # Slow cmd list e.g. hgetall, mset slow-cmd-list : -# The number of sync-thread for data replication from master, those are the threads work on slave nodes -# and are used to execute commands sent from master node when replicating. +# The number of threads to write DB in slaveNode when replicating. +# It's preferable to set slave's sync-thread-num value close to master's thread-pool-size. sync-thread-num : 6 +# The num of threads to write binlog in slaveNode when replicating, +# each DB cloud only bind to one sync-binlog-thread to write binlog in maximum +#[NOTICE] It's highly recommended to set sync-binlog-thread-num equal to conf item 'database'(then each DB cloud have a exclusive thread to write binlog), +# eg. if you use 8 DBs(databases_ is 8), sync-binlog-thread-num is preferable to be 8 +# Valid range of sync-binlog-thread-num is [1, databases], the final value of it is Min(sync-binlog-thread-num, databases) +sync-binlog-thread-num : 1 + # Directory to store log files of Pika, which contains multiple types of logs, # Including: INFO, WARNING, ERROR log, as well as binglog(write2fine) file which # is used for replication. @@ -101,6 +108,8 @@ instance-mode : classic # The default database id is DB 0. You can select a different one on # a per-connection by using SELECT. The db id range is [0, 'databases' value -1]. # The value range of this parameter is [1, 8]. +# [NOTICE] It's RECOMMENDED to set sync-binlog-thread-num equal to DB num(databases), +# if you've changed the value of databases, remember to check if the value of sync-binlog-thread-num is proper. databases : 1 # The number of followers of a master. Only [0, 1, 2, 3, 4] is valid at present. @@ -308,6 +317,11 @@ max-write-buffer-num : 2 # whether the key exists. Setting this value too high may hurt performance. min-write-buffer-number-to-merge : 1 +# The total size of wal files, when reaches this limit, rocksdb will force the flush of column-families +# whose memtables are backed by the oldest live WAL file. Also used to control the rocksdb open time when +# process restart. +max-total-wal-size : 1073741824 + # rocksdb level0_stop_writes_trigger level0-stop-writes-trigger : 36 @@ -466,9 +480,14 @@ default-slot-num : 1024 # The cache will be sharded into 2^blob-num-shard-bits shards. # blob-num-shard-bits : -1 -# Rsync Rate limiting configuration 200MB/s +# Rsync Rate limiting configuration [Default value is 200MB/s] +# [USED BY SLAVE] The transmitting speed(Rsync Rate) In full replication is controlled BY SLAVE NODE, You should modify the throttle-bytes-per-second in slave's pika.conf if you wanna change the rsync rate limit. +# [Dynamic Change Supported] send command 'config set throttle-bytes-per-second new_value' to SLAVE NODE can dynamically adjust rsync rate during full sync(use config rewrite can persist the changes). throttle-bytes-per-second : 207200000 - +# Rsync timeout in full sync stage[Default value is 1000 ms], unnecessary retries will happen if this value is too small. +# [Dynamic Change Supported] similar to throttle-bytes-per-second, rsync-timeout-ms can be dynamically changed by configset command +# [USED BY SLAVE] Similar to throttle-bytes-per-second, you should change rsync-timeout-ms's value in slave's conf file if it is needed to adjust. +rsync-timeout-ms : 1000 # The valid range for max-rsync-parallel-num is [1, 4]. # If an invalid value is provided, max-rsync-parallel-num will automatically be reset to 4. max-rsync-parallel-num : 4 diff --git a/tests/unit/type/hyperloglog.tcl b/tests/unit/type/hyperloglog.tcl new file mode 100644 index 0000000000..1f719cc4d6 --- /dev/null +++ b/tests/unit/type/hyperloglog.tcl @@ -0,0 +1,262 @@ +start_server {tags {"hll"}} { +# Pika does not support the pfdebug command +# test {HyperLogLog self test passes} { +# catch {r pfselftest} e +# set e +# } {OK} + + test {PFADD without arguments creates an HLL value} { + r pfadd hll + r exists hll + } {1} + + test {Approximated cardinality after creation is zero} { + r pfcount hll + } {0} + + test {PFADD returns 1 when at least 1 reg was modified} { + r pfadd hll a b c + } {1} + + test {PFADD returns 0 when no reg was modified} { + r pfadd hll a b c + } {0} + + test {PFADD works with empty string (regression)} { + r pfadd hll "" + } + + # Note that the self test stresses much better the + # cardinality estimation error. We are testing just the + # command implementation itself here. + test {PFCOUNT returns approximated cardinality of set} { + r del hll + set res {} + r pfadd hll 1 2 3 4 5 + lappend res [r pfcount hll] + # Call it again to test cached value invalidation. + r pfadd hll 6 7 8 8 9 10 + lappend res [r pfcount hll] + set res + } {5 10} + +# This parameter is not available in Pika +# test {HyperLogLogs are promote from sparse to dense} { +# r del hll +# r config set hll-sparse-max-bytes 3000 +# set n 0 +# while {$n < 100} { +# set elements {} +# for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]} +# incr n 100 +# r pfadd hll {*}$elements +# set card [r pfcount hll] +# set err [expr {abs($card-$n)}] +# assert {$err < (double($card)/100)*5} +# if {$n < 1000} { +# assert {[r pfdebug encoding hll] eq {sparse}} +# } elseif {$n > 10000} { +# assert {[r pfdebug encoding hll] eq {dense}} +# } +# } +# } + +# Pika does not support the pfdebug command +# test {HyperLogLog sparse encoding stress test} { +# for {set x 0} {$x < 1000} {incr x} { +# r del hll1 hll2 +# set numele [randomInt 100] +# set elements {} +# for {set j 0} {$j < $numele} {incr j} { +# lappend elements [expr rand()] +# } + # Force dense representation of hll2 +# r pfadd hll2 +# r pfdebug todense hll2 +# r pfadd hll1 {*}$elements +# r pfadd hll2 {*}$elements +# assert {[r pfdebug encoding hll1] eq {sparse}} +# assert {[r pfdebug encoding hll2] eq {dense}} +# # Cardinality estimated should match exactly. +# assert {[r pfcount hll1] eq [r pfcount hll2]} +# } +# } + +# The return value of Pika is inconsistent with Redis + test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} { + r del hll + r pfadd hll a b c + r append hll "hello" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + +# The return value of Pika is inconsistent with Redis + test {Corrupted sparse HyperLogLogs are detected: Broken magic} { + r del hll + r pfadd hll a b c + r setrange hll 0 "0123" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + +# The return value of Pika is inconsistent with Redis + test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} { + r del hll + r pfadd hll a b c + r setrange hll 4 "x" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + +# The return value of Pika is inconsistent with Redis + test {Corrupted dense HyperLogLogs are detected: Wrong length} { + r del hll + r pfadd hll a b c + r setrange hll 4 "\x00" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + +# The return value of Pika is inconsistent with Redis + test {PFADD, PFCOUNT, PFMERGE type checking works} { + r set foo bar + catch {r pfadd foo 1} e + assert_match {*WRONGTYPE*} $e + catch {r pfcount foo} e + assert_match {*WRONGTYPE*} $e + catch {r pfmerge bar foo} e + assert_match {*WRONGTYPE*} $e + # catch {r pfmerge foo bar} e + # assert_match {*WRONGTYPE*} $e + } + + test {PFMERGE results on the cardinality of union of sets} { + r del hll hll1 hll2 hll3 + r pfadd hll1 a b c + r pfadd hll2 b c d + r pfadd hll3 c d e + r pfmerge hll hll1 hll2 hll3 + r pfcount hll + } {5} + +# The return value of Pika is inconsistent with Redis + test {PFCOUNT multiple-keys merge returns cardinality of union} { + r del hll1 hll2 hll3 + for {set x 1} {$x < 100} {incr x} { + # Force dense representation of hll2 + r pfadd hll1 "foo-$x" + r pfadd hll2 "bar-$x" + r pfadd hll3 "zap-$x" + + set card [r pfcount hll1 hll2 hll3] + set realcard [expr {$x*3}] + set err [expr {abs($card-$realcard)}] + assert {$err < (double($card)/100)*5} + } + } + +# The return value of Pika is inconsistent with Redis +# test {HYPERLOGLOG press test: 5w, 10w, 15w, 20w, 30w, 50w, 100w} { +# r del hll1 +# for {set x 1} {$x <= 1000000} {incr x} { +# r pfadd hll1 "foo-$x" +# if {$x == 50000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.01} +# } +# if {$x == 100000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.01} +# } +# if {$x == 150000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.01} +# } +# if {$x == 300000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.01} +# } +# if {$x == 500000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.01} +# } +# if {$x == 1000000} { +# set card [r pfcount hll1] +# set realcard [expr {$x*1}] +# set err [expr {abs($card-$realcard)}] +# +# set d_err [expr {$err * 1.0}] +# set d_realcard [expr {$realcard * 1.0}] +# set err_precentage [expr {double($d_err / $d_realcard)}] +# puts "$x error rate: $err_precentage" +# assert {$err < $realcard * 0.03} +# } +# } +# } + +# Pika does not support the pfdebug command +# test {PFDEBUG GETREG returns the HyperLogLog raw registers} { +# r del hll +# r pfadd hll 1 2 3 +# llength [r pfdebug getreg hll] +# } {16384} + +# Pika does not support the pfdebug command +# test {PFDEBUG GETREG returns the HyperLogLog raw registers} { +# r del hll +# r pfadd hll 1 2 3 +# llength [r pfdebug getreg hll] +# } {16384} + +# The return value of Pika is inconsistent with Redis + test {PFADD / PFCOUNT cache invalidation works} { + r del hll + r pfadd hll a b c + r pfcount hll + assert {[r getrange hll 15 15] eq "\x00"} + r pfadd hll a b c + assert {[r getrange hll 15 15] eq "\x00"} + # r pfadd hll 1 2 3 + # assert {[r getrange hll 15 15] eq "\x80"} + } +} From 15b3000284d185639dd03479b934f782c3b4f665 Mon Sep 17 00:00:00 2001 From: saz97 Date: Mon, 17 Jun 2024 14:37:56 +0800 Subject: [PATCH 3/9] delete output log --- src/storage/src/storage.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index fc0b6138d4..450f06636b 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -1586,7 +1586,6 @@ Status Storage::PfCount(const std::vector& keys, int64_t* result) { } else { return s; } - LOG(INFO) << s.ToString() << std::endl; HyperLogLog first_log(kPrecision, first_registers); for (size_t i = 1; i < keys.size(); ++i) { std::string value; From 597207513ca33e550b8bb02096f0119d2bc684e8 Mon Sep 17 00:00:00 2001 From: saz97 Date: Tue, 18 Jun 2024 11:18:22 +0800 Subject: [PATCH 4/9] modify code format --- src/storage/src/redis_hyperloglog.cc | 79 +++++++++++++------------- src/storage/src/strings_value_format.h | 12 ++-- 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index 10e88db380..b378e31ce5 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -119,8 +119,8 @@ std::string HyperLogLog::Merge(const HyperLogLog& hll) { uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std::min(b, ::__builtin_ctz(x))) + 1; } -bool IsHyperloglogObj(std::string* internal_value_str){ - size_t offset = 0; +bool IsHyperloglogObj(std::string *internal_value_str) { + size_t offset = 0; size_t kStringsValueSuffixLength = 2 * kTimestampLength + kSuffixReserveLength; char reserve[16] = {0}; offset += kTypeLength; @@ -128,49 +128,52 @@ bool IsHyperloglogObj(std::string* internal_value_str){ offset += (rocksdb::Slice(internal_value_str->data() + offset, internal_value_str->size() - kStringsValueSuffixLength - offset)).size(); memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); - - bool res = (reserve[0] & 0x80) != 0;//if first bit in reserve is 0 , then this obj is string; else the obj is hll + + //if first bit in reserve is 0 , then this obj is string; else the obj is hll + bool res = (reserve[0] & 0x80) != 0; return res; } -Status Redis::HyperloglogGet(const Slice& key, std::string* value) { - value->clear(); - - BaseKey base_key(key); - Status s = db_->Get(default_read_options_, base_key.Encode(), value); - std::string meta_value = *value; - if (s.ok()){ - if (!ExpectedMetaValue(DataType::kStrings, meta_value)){ - if (ExpectedStale(meta_value)) { - s = Status::NotFound(); - } else { - return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + ", expect type: " + "strings " + "get type: " + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); - } - } - else if (!IsHyperloglogObj(value)){ - return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + ", expect type: " + "hyperloglog " + "get type: " + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); - } - else { - ParsedStringsValue parsed_strings_value(value); - if (parsed_strings_value.IsStale()) { - value->clear(); - return Status::NotFound("Stale"); - } else { - parsed_strings_value.StripSuffix(); - } - } +Status Redis::HyperloglogGet(const Slice &key, std::string *value) { + value->clear(); + + BaseKey base_key(key); + Status s = db_->Get(default_read_options_, base_key.Encode(), value); + std::string meta_value = *value; + if (s.ok()) { + if (!ExpectedMetaValue(DataType::kStrings, meta_value)) { + if (ExpectedStale(meta_value)) { + s = Status::NotFound(); + } else { + return Status::InvalidArgument( + "WRONGTYPE, key: " + key.ToString() + ", expect type: " + "strings " + "get type: " + + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } + } else if (!IsHyperloglogObj(value)) { + return Status::InvalidArgument( + "WRONGTYPE, key: " + key.ToString() + ", expect type: " + "hyperloglog " + "get type: " + + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } else { + ParsedStringsValue parsed_strings_value(value); + if (parsed_strings_value.IsStale()) { + value->clear(); + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + } + } - } + } - return s; + return s; } -Status Redis::HyperloglogSet(const Slice& key, const Slice& value) { - HyperloglogValue hyperloglog_value(value); - ScopeRecordLock l(lock_mgr_, key); +Status Redis::HyperloglogSet(const Slice &key, const Slice &value) { + HyperloglogValue hyperloglog_value(value); + ScopeRecordLock l(lock_mgr_, key); - BaseKey base_key(key); - return db_->Put(default_write_options_, base_key.Encode(), hyperloglog_value.Encode()); + BaseKey base_key(key); + return db_->Put(default_write_options_, base_key.Encode(), hyperloglog_value.Encode()); } -} // namespace storage +} // namespace storage \ No newline at end of file diff --git a/src/storage/src/strings_value_format.h b/src/storage/src/strings_value_format.h index 4a17af9cdb..a6c1bd6dd5 100644 --- a/src/storage/src/strings_value_format.h +++ b/src/storage/src/strings_value_format.h @@ -23,13 +23,13 @@ class StringsValue : public InternalValue { size_t usize = user_value_.size(); size_t needed = usize + kSuffixReserveLength + 2 * kTimestampLength + kTypeLength; char* dst = ReAllocIfNeeded(needed); - memcpy(dst, &type_, sizeof(type_));//set type as Kstring + memcpy(dst, &type_, sizeof(type_)); dst += sizeof(type_); char* start_pos = dst; - memcpy(dst, user_value_.data(), usize);//copy real value + memcpy(dst, user_value_.data(), usize); dst += usize; - memcpy(dst, reserve_, kSuffixReserveLength);//copy reserve + memcpy(dst, reserve_, kSuffixReserveLength); dst += kSuffixReserveLength; EncodeFixed64(dst, ctime_); dst += kTimestampLength; @@ -45,14 +45,14 @@ class HyperloglogValue : public InternalValue { size_t usize = user_value_.size(); size_t needed = usize + kSuffixReserveLength + 2 * kTimestampLength + kTypeLength; char* dst = ReAllocIfNeeded(needed); - memcpy(dst, &type_, sizeof(type_));//set type as Kstring + memcpy(dst, &type_, sizeof(type_)); dst += sizeof(type_); char* start_pos = dst; - memcpy(dst, user_value_.data(), usize);//copy real value + memcpy(dst, user_value_.data(), usize); dst += usize; reserve_[0] = 0x80; - memcpy(dst, reserve_, kSuffixReserveLength);//copy reserve + memcpy(dst, reserve_, kSuffixReserveLength); dst += kSuffixReserveLength; EncodeFixed64(dst, ctime_); dst += kTimestampLength; From 592f3dbdb93709a006c816f79f6f328882c8614b Mon Sep 17 00:00:00 2001 From: saz97 Date: Tue, 18 Jun 2024 14:14:43 +0800 Subject: [PATCH 5/9] revise HyperloglogGet based on committed advise --- src/storage/src/redis_hyperloglog.cc | 46 +++++++++++++--------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index b378e31ce5..b7b5cdaa97 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -130,8 +130,7 @@ bool IsHyperloglogObj(std::string *internal_value_str) { memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); //if first bit in reserve is 0 , then this obj is string; else the obj is hll - bool res = (reserve[0] & 0x80) != 0; - return res; + return (reserve[0] & 0x80) != 0;; } Status Redis::HyperloglogGet(const Slice &key, std::string *value) { @@ -140,31 +139,30 @@ Status Redis::HyperloglogGet(const Slice &key, std::string *value) { BaseKey base_key(key); Status s = db_->Get(default_read_options_, base_key.Encode(), value); std::string meta_value = *value; - if (s.ok()) { - if (!ExpectedMetaValue(DataType::kStrings, meta_value)) { - if (ExpectedStale(meta_value)) { - s = Status::NotFound(); - } else { - return Status::InvalidArgument( - "WRONGTYPE, key: " + key.ToString() + ", expect type: " + "strings " + "get type: " + - DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); - } - } else if (!IsHyperloglogObj(value)) { - return Status::InvalidArgument( - "WRONGTYPE, key: " + key.ToString() + ", expect type: " + "hyperloglog " + "get type: " + - DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + if (!s.ok()) { + return s; + } + if (!ExpectedMetaValue(DataType::kStrings, meta_value)) { + if (ExpectedStale(meta_value)) { + s = Status::NotFound(); } else { - ParsedStringsValue parsed_strings_value(value); - if (parsed_strings_value.IsStale()) { - value->clear(); - return Status::NotFound("Stale"); - } else { - parsed_strings_value.StripSuffix(); - } + return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + + ", expect type: " + "hyperloglog " + "get type: " + + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } + } else if (!IsHyperloglogObj(value)) { + return Status::InvalidArgument("WRONGTYPE, key: " + key.ToString() + + ",expect type: " + "hyperloglog " + "get type: " + + DataTypeStrings[static_cast(GetMetaValueType(meta_value))]); + } else { + ParsedStringsValue parsed_strings_value(value); + if (parsed_strings_value.IsStale()) { + value->clear(); + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); } - } - return s; } From ff20a6ba4d58d324435c6ba3f72c45e053bc4006 Mon Sep 17 00:00:00 2001 From: saz97 Date: Wed, 19 Jun 2024 20:01:25 +0800 Subject: [PATCH 6/9] revise HyperloglogGet based on committed advise --- src/storage/src/redis_hyperloglog.cc | 7 +++---- src/storage/src/strings_value_format.h | 6 +++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index b7b5cdaa97..c1416f7a64 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -119,21 +119,20 @@ std::string HyperLogLog::Merge(const HyperLogLog& hll) { uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std::min(b, ::__builtin_ctz(x))) + 1; } -bool IsHyperloglogObj(std::string *internal_value_str) { +bool IsHyperloglogObj(const std::string* internal_value_str) { size_t offset = 0; size_t kStringsValueSuffixLength = 2 * kTimestampLength + kSuffixReserveLength; char reserve[16] = {0}; offset += kTypeLength; - rocksdb::Slice user_value_; offset += (rocksdb::Slice(internal_value_str->data() + offset, internal_value_str->size() - kStringsValueSuffixLength - offset)).size(); memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); //if first bit in reserve is 0 , then this obj is string; else the obj is hll - return (reserve[0] & 0x80) != 0;; + return (reserve[0] & hyperloglog_reserve_flag) != 0;; } -Status Redis::HyperloglogGet(const Slice &key, std::string *value) { +Status Redis::HyperloglogGet(const Slice &key, std::string* value) { value->clear(); BaseKey base_key(key); diff --git a/src/storage/src/strings_value_format.h b/src/storage/src/strings_value_format.h index a6c1bd6dd5..9a0491c29b 100644 --- a/src/storage/src/strings_value_format.h +++ b/src/storage/src/strings_value_format.h @@ -11,11 +11,15 @@ #include "src/base_value_format.h" #include "storage/storage_define.h" + namespace storage { /* * | type | value | reserve | cdate | timestamp | * | 1B | | 16B | 8B | 8B | +* The first bit in reservse field is used to isolate string and hyperloglog */ + +#define hyperloglog_reserve_flag 0x80 class StringsValue : public InternalValue { public: explicit StringsValue(const rocksdb::Slice& user_value) : InternalValue(DataType::kStrings, user_value) {} @@ -51,7 +55,7 @@ class HyperloglogValue : public InternalValue { memcpy(dst, user_value_.data(), usize); dst += usize; - reserve_[0] = 0x80; + reserve_[0] |= hyperloglog_reserve_flag; memcpy(dst, reserve_, kSuffixReserveLength); dst += kSuffixReserveLength; EncodeFixed64(dst, ctime_); From 330e4acc3e48708b381b61115e97a24b308f0dd8 Mon Sep 17 00:00:00 2001 From: saz97 Date: Fri, 21 Jun 2024 17:30:14 +0800 Subject: [PATCH 7/9] modified code based on review --- src/storage/src/redis_hyperloglog.cc | 4 ++-- src/storage/src/strings_value_format.h | 4 ++-- tests/assets/default.conf | 29 +++++--------------------- 3 files changed, 9 insertions(+), 28 deletions(-) diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index c1416f7a64..cf98d123c8 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -115,7 +115,7 @@ std::string HyperLogLog::Merge(const HyperLogLog& hll) { return result; } -// ::__builtin_ctz(x): 返回右起第一个‘1’之后的0的个数 +// ::__builtin_ctz(x): return the first number of '0' after the first '1' from the right uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std::min(b, ::__builtin_ctz(x))) + 1; } @@ -128,7 +128,7 @@ bool IsHyperloglogObj(const std::string* internal_value_str) { internal_value_str->size() - kStringsValueSuffixLength - offset)).size(); memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); - //if first bit in reserve is 0 , then this obj is string; else the obj is hll + //if first bit in reserve is 0 , then this obj is string; else the obj is hyperloglog return (reserve[0] & hyperloglog_reserve_flag) != 0;; } diff --git a/src/storage/src/strings_value_format.h b/src/storage/src/strings_value_format.h index 9a0491c29b..6e001d7475 100644 --- a/src/storage/src/strings_value_format.h +++ b/src/storage/src/strings_value_format.h @@ -18,8 +18,8 @@ namespace storage { * | 1B | | 16B | 8B | 8B | * The first bit in reservse field is used to isolate string and hyperloglog */ - -#define hyperloglog_reserve_flag 0x80 + // 80H = 1000000B +constexpr uint8_t hyperloglog_reserve_flag = 0x80; class StringsValue : public InternalValue { public: explicit StringsValue(const rocksdb::Slice& user_value) : InternalValue(DataType::kStrings, user_value) {} diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 1a7b815885..d5d1318f5c 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -34,17 +34,10 @@ slow-cmd-thread-pool-size : 1 # Slow cmd list e.g. hgetall, mset slow-cmd-list : -# The number of threads to write DB in slaveNode when replicating. -# It's preferable to set slave's sync-thread-num value close to master's thread-pool-size. +# The number of sync-thread for data replication from master, those are the threads work on slave nodes +# and are used to execute commands sent from master node when replicating. sync-thread-num : 6 -# The num of threads to write binlog in slaveNode when replicating, -# each DB cloud only bind to one sync-binlog-thread to write binlog in maximum -#[NOTICE] It's highly recommended to set sync-binlog-thread-num equal to conf item 'database'(then each DB cloud have a exclusive thread to write binlog), -# eg. if you use 8 DBs(databases_ is 8), sync-binlog-thread-num is preferable to be 8 -# Valid range of sync-binlog-thread-num is [1, databases], the final value of it is Min(sync-binlog-thread-num, databases) -sync-binlog-thread-num : 1 - # Directory to store log files of Pika, which contains multiple types of logs, # Including: INFO, WARNING, ERROR log, as well as binglog(write2fine) file which # is used for replication. @@ -108,8 +101,6 @@ instance-mode : classic # The default database id is DB 0. You can select a different one on # a per-connection by using SELECT. The db id range is [0, 'databases' value -1]. # The value range of this parameter is [1, 8]. -# [NOTICE] It's RECOMMENDED to set sync-binlog-thread-num equal to DB num(databases), -# if you've changed the value of databases, remember to check if the value of sync-binlog-thread-num is proper. databases : 1 # The number of followers of a master. Only [0, 1, 2, 3, 4] is valid at present. @@ -317,11 +308,6 @@ max-write-buffer-num : 2 # whether the key exists. Setting this value too high may hurt performance. min-write-buffer-number-to-merge : 1 -# The total size of wal files, when reaches this limit, rocksdb will force the flush of column-families -# whose memtables are backed by the oldest live WAL file. Also used to control the rocksdb open time when -# process restart. -max-total-wal-size : 1073741824 - # rocksdb level0_stop_writes_trigger level0-stop-writes-trigger : 36 @@ -480,14 +466,9 @@ default-slot-num : 1024 # The cache will be sharded into 2^blob-num-shard-bits shards. # blob-num-shard-bits : -1 -# Rsync Rate limiting configuration [Default value is 200MB/s] -# [USED BY SLAVE] The transmitting speed(Rsync Rate) In full replication is controlled BY SLAVE NODE, You should modify the throttle-bytes-per-second in slave's pika.conf if you wanna change the rsync rate limit. -# [Dynamic Change Supported] send command 'config set throttle-bytes-per-second new_value' to SLAVE NODE can dynamically adjust rsync rate during full sync(use config rewrite can persist the changes). +# Rsync Rate limiting configuration 200MB/s throttle-bytes-per-second : 207200000 -# Rsync timeout in full sync stage[Default value is 1000 ms], unnecessary retries will happen if this value is too small. -# [Dynamic Change Supported] similar to throttle-bytes-per-second, rsync-timeout-ms can be dynamically changed by configset command -# [USED BY SLAVE] Similar to throttle-bytes-per-second, you should change rsync-timeout-ms's value in slave's conf file if it is needed to adjust. -rsync-timeout-ms : 1000 + # The valid range for max-rsync-parallel-num is [1, 4]. # If an invalid value is provided, max-rsync-parallel-num will automatically be reset to 4. max-rsync-parallel-num : 4 @@ -586,4 +567,4 @@ cache-lfu-decay-time: 1 # Warning: Ensure that the Settings of rename-command on the master and slave servers are consistent # # Example: -# rename-command : FLUSHDB 360flushdb +# rename-command : FLUSHDB 360flushdb \ No newline at end of file From a369e2ac53ba5134924798b5e7f4bf1c81ff29e0 Mon Sep 17 00:00:00 2001 From: saz97 Date: Mon, 24 Jun 2024 11:29:33 +0800 Subject: [PATCH 8/9] modify code based on review --- src/storage/src/redis_hyperloglog.cc | 5 +- tests/assets/default.conf | 85 ++++++++++++++++++++++------ 2 files changed, 70 insertions(+), 20 deletions(-) diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc index cf98d123c8..c9cd1dd4c1 100644 --- a/src/storage/src/redis_hyperloglog.cc +++ b/src/storage/src/redis_hyperloglog.cc @@ -120,12 +120,9 @@ uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std:: bool IsHyperloglogObj(const std::string* internal_value_str) { - size_t offset = 0; size_t kStringsValueSuffixLength = 2 * kTimestampLength + kSuffixReserveLength; char reserve[16] = {0}; - offset += kTypeLength; - offset += (rocksdb::Slice(internal_value_str->data() + offset, - internal_value_str->size() - kStringsValueSuffixLength - offset)).size(); + size_t offset = internal_value_str->size() - kStringsValueSuffixLength; memcpy(reserve, internal_value_str->data() + offset, kSuffixReserveLength); //if first bit in reserve is 0 , then this obj is string; else the obj is hyperloglog diff --git a/tests/assets/default.conf b/tests/assets/default.conf index d5d1318f5c..3fcb5d5158 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -27,6 +27,11 @@ thread-num : 1 # are dedicated to handling user requests. thread-pool-size : 12 +# This parameter is used to control whether to separate fast and slow commands. +# When slow-cmd-pool is set to yes, fast and slow commands are separated. +# When set to no, they are not separated. +slow-cmd-pool : no + # Size of the low level thread pool, The threads within this pool # are dedicated to handling slow user requests. slow-cmd-thread-pool-size : 1 @@ -34,10 +39,17 @@ slow-cmd-thread-pool-size : 1 # Slow cmd list e.g. hgetall, mset slow-cmd-list : -# The number of sync-thread for data replication from master, those are the threads work on slave nodes -# and are used to execute commands sent from master node when replicating. +# The number of threads to write DB in slaveNode when replicating. +# It's preferable to set slave's sync-thread-num value close to master's thread-pool-size. sync-thread-num : 6 +# The num of threads to write binlog in slaveNode when replicating, +# each DB cloud only bind to one sync-binlog-thread to write binlog in maximum +#[NOTICE] It's highly recommended to set sync-binlog-thread-num equal to conf item 'database'(then each DB cloud have a exclusive thread to write binlog), +# eg. if you use 8 DBs(databases_ is 8), sync-binlog-thread-num is preferable to be 8 +# Valid range of sync-binlog-thread-num is [1, databases], the final value of it is Min(sync-binlog-thread-num, databases) +sync-binlog-thread-num : 1 + # Directory to store log files of Pika, which contains multiple types of logs, # Including: INFO, WARNING, ERROR log, as well as binglog(write2fine) file which # is used for replication. @@ -101,6 +113,8 @@ instance-mode : classic # The default database id is DB 0. You can select a different one on # a per-connection by using SELECT. The db id range is [0, 'databases' value -1]. # The value range of this parameter is [1, 8]. +# [NOTICE] It's RECOMMENDED to set sync-binlog-thread-num equal to DB num(databases), +# if you've changed the value of databases, remember to check if the value of sync-binlog-thread-num is proper. databases : 1 # The number of followers of a master. Only [0, 1, 2, 3, 4] is valid at present. @@ -231,7 +245,8 @@ slave-priority : 100 # The disable_auto_compactions option is [true | false] disable_auto_compactions : false -# Rocksdb max_subcompactions +# Rocksdb max_subcompactions, increasing this value can accelerate the exec speed of a single compaction task +# it's recommended to increase it's value if large compaction is found in you instance max-subcompactions : 1 # The minimum disk usage ratio for checking resume. # If the disk usage ratio is lower than min-check-resume-ratio, it will not check resume, only higher will check resume. @@ -308,6 +323,11 @@ max-write-buffer-num : 2 # whether the key exists. Setting this value too high may hurt performance. min-write-buffer-number-to-merge : 1 +# The total size of wal files, when reaches this limit, rocksdb will force the flush of column-families +# whose memtables are backed by the oldest live WAL file. Also used to control the rocksdb open time when +# process restart. +max-total-wal-size : 1073741824 + # rocksdb level0_stop_writes_trigger level0-stop-writes-trigger : 36 @@ -338,17 +358,42 @@ compression : snappy # https://github.com/facebook/rocksdb/wiki/Compression #compression_per_level : [none:none:snappy:lz4:lz4] +# The number of rocksdb background threads(sum of max-background-compactions and max-background-flushes) +# If max-background-jobs has a valid value AND both 'max-background-flushs' and 'max-background-compactions' is set to -1, +# then max-background-flushs' and 'max-background-compactions will be auto config by rocksdb, specifically: +# 1/4 of max-background-jobs will be given to max-background-flushs' and the rest(3/4) will be given to 'max-background-compactions'. +# 'max-background-jobs' default value is 3 and the value range is [2, 12]. +max-background-jobs : 3 + # The number of background flushing threads. -# max-background-flushes default value is 1 and the value range is [1, 4]. -max-background-flushes : 1 +# max-background-flushes default value is -1 and the value range is [1, 4] or -1. +# if 'max-background-flushes' is set to -1, the 'max-background-compactions' should also be set to -1, +# which means let rocksdb to auto config them based on the value of 'max-background-jobs' +max-background-flushes : -1 + +# [NOTICE] you MUST NOT set one of the max-background-flushes or max-background-compactions to -1 while setting another one to other values(not -1). +# They SHOULD both be -1 or both not(if you want to config them manually). # The number of background compacting threads. -# max-background-compactions default value is 2 and the value range is [1, 8]. -max-background-compactions : 2 +# max-background-compactions default value is -1 and the value range is [1, 8] or -1. +# if 'max-background-compactions' is set to -1, the 'max-background-flushes' should also be set to -1, +# which means let rocksdb to auto config them based on the value of 'max-background-jobs' +max-background-compactions : -1 + +# RocksDB delayed-write-rate, default is 0(infer from rate-limiter by RocksDB) +# Ref from rocksdb: Whenever stall conditions are triggered, RocksDB will reduce write rate to delayed_write_rate, +# and could possibly reduce write rate to even lower than delayed_write_rate if estimated pending compaction bytes accumulates. +# If the value is 0, RcoksDB will infer a value from `rater_limiter` value if it is not empty, or 16MB if `rater_limiter` is empty. +# Note that if users change the rate in `rate_limiter` after DB is opened, delayed_write_rate won't be adjusted. +# [Support Dynamically changeable] send 'config set delayed-write-rate' to a running pika can change it's value dynamically +delayed-write-rate : 0 + + +# RocksDB will try to limit number of bytes in one compaction to be lower than this max-compaction-bytes. +# But it's NOT guaranteed. +# default value is -1, means let it be 25 * target-file-size-base (Which is RocksDB's default value) +max-compaction-bytes : -1 -# The number of background threads. -# max-background-jobs default value is 3 and the value range is [2, 12]. -max-background-jobs : 3 # maximum value of RocksDB cached open file descriptors max-cache-files : 5000 @@ -414,14 +459,17 @@ default-slot-num : 1024 # 0: Read 1: Write 2: ReadAndWrite # rate-limiter-mode : default 1 -# rate limiter bandwidth, default 2000MB/s -#rate-limiter-bandwidth : 2097152000 +# rate limiter bandwidth, units in bytes, default 1024GB/s (No limit) +# [Support Dynamically changeable] send 'rate-limiter-bandwidth' to a running pika can change it's value dynamically +#rate-limiter-bandwidth : 1099511627776 #rate-limiter-refill-period-us : 100000 # #rate-limiter-fairness: 10 -# rate limiter auto tune https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html. the default value is false. +# if auto_tuned is true: Enables dynamic adjustment of rate limit within the range +#`[rate-limiter-bandwidth / 20, rate-limiter-bandwidth]`, according to the recent demand for background I/O. +# rate limiter auto tune https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html. the default value is true. #rate-limiter-auto-tuned : true ################################## RocksDB Blob Configure ##################### @@ -466,9 +514,14 @@ default-slot-num : 1024 # The cache will be sharded into 2^blob-num-shard-bits shards. # blob-num-shard-bits : -1 -# Rsync Rate limiting configuration 200MB/s +# Rsync Rate limiting configuration [Default value is 200MB/s] +# [USED BY SLAVE] The transmitting speed(Rsync Rate) In full replication is controlled BY SLAVE NODE, You should modify the throttle-bytes-per-second in slave's pika.conf if you wanna change the rsync rate limit. +# [Dynamic Change Supported] send command 'config set throttle-bytes-per-second new_value' to SLAVE NODE can dynamically adjust rsync rate during full sync(use config rewrite can persist the changes). throttle-bytes-per-second : 207200000 - +# Rsync timeout in full sync stage[Default value is 1000 ms], unnecessary retries will happen if this value is too small. +# [Dynamic Change Supported] similar to throttle-bytes-per-second, rsync-timeout-ms can be dynamically changed by configset command +# [USED BY SLAVE] Similar to throttle-bytes-per-second, you should change rsync-timeout-ms's value in slave's conf file if it is needed to adjust. +rsync-timeout-ms : 1000 # The valid range for max-rsync-parallel-num is [1, 4]. # If an invalid value is provided, max-rsync-parallel-num will automatically be reset to 4. max-rsync-parallel-num : 4 @@ -567,4 +620,4 @@ cache-lfu-decay-time: 1 # Warning: Ensure that the Settings of rename-command on the master and slave servers are consistent # # Example: -# rename-command : FLUSHDB 360flushdb \ No newline at end of file +# rename-command : FLUSHDB 360flushdb From 1b3bdd803beca152f27c0e2009f66185a92028cb Mon Sep 17 00:00:00 2001 From: saz97 Date: Mon, 24 Jun 2024 11:35:11 +0800 Subject: [PATCH 9/9] modify default.conf --- tests/assets/default.conf | 85 ++++++++------------------------------- 1 file changed, 16 insertions(+), 69 deletions(-) diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 3fcb5d5158..d5d1318f5c 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -27,11 +27,6 @@ thread-num : 1 # are dedicated to handling user requests. thread-pool-size : 12 -# This parameter is used to control whether to separate fast and slow commands. -# When slow-cmd-pool is set to yes, fast and slow commands are separated. -# When set to no, they are not separated. -slow-cmd-pool : no - # Size of the low level thread pool, The threads within this pool # are dedicated to handling slow user requests. slow-cmd-thread-pool-size : 1 @@ -39,17 +34,10 @@ slow-cmd-thread-pool-size : 1 # Slow cmd list e.g. hgetall, mset slow-cmd-list : -# The number of threads to write DB in slaveNode when replicating. -# It's preferable to set slave's sync-thread-num value close to master's thread-pool-size. +# The number of sync-thread for data replication from master, those are the threads work on slave nodes +# and are used to execute commands sent from master node when replicating. sync-thread-num : 6 -# The num of threads to write binlog in slaveNode when replicating, -# each DB cloud only bind to one sync-binlog-thread to write binlog in maximum -#[NOTICE] It's highly recommended to set sync-binlog-thread-num equal to conf item 'database'(then each DB cloud have a exclusive thread to write binlog), -# eg. if you use 8 DBs(databases_ is 8), sync-binlog-thread-num is preferable to be 8 -# Valid range of sync-binlog-thread-num is [1, databases], the final value of it is Min(sync-binlog-thread-num, databases) -sync-binlog-thread-num : 1 - # Directory to store log files of Pika, which contains multiple types of logs, # Including: INFO, WARNING, ERROR log, as well as binglog(write2fine) file which # is used for replication. @@ -113,8 +101,6 @@ instance-mode : classic # The default database id is DB 0. You can select a different one on # a per-connection by using SELECT. The db id range is [0, 'databases' value -1]. # The value range of this parameter is [1, 8]. -# [NOTICE] It's RECOMMENDED to set sync-binlog-thread-num equal to DB num(databases), -# if you've changed the value of databases, remember to check if the value of sync-binlog-thread-num is proper. databases : 1 # The number of followers of a master. Only [0, 1, 2, 3, 4] is valid at present. @@ -245,8 +231,7 @@ slave-priority : 100 # The disable_auto_compactions option is [true | false] disable_auto_compactions : false -# Rocksdb max_subcompactions, increasing this value can accelerate the exec speed of a single compaction task -# it's recommended to increase it's value if large compaction is found in you instance +# Rocksdb max_subcompactions max-subcompactions : 1 # The minimum disk usage ratio for checking resume. # If the disk usage ratio is lower than min-check-resume-ratio, it will not check resume, only higher will check resume. @@ -323,11 +308,6 @@ max-write-buffer-num : 2 # whether the key exists. Setting this value too high may hurt performance. min-write-buffer-number-to-merge : 1 -# The total size of wal files, when reaches this limit, rocksdb will force the flush of column-families -# whose memtables are backed by the oldest live WAL file. Also used to control the rocksdb open time when -# process restart. -max-total-wal-size : 1073741824 - # rocksdb level0_stop_writes_trigger level0-stop-writes-trigger : 36 @@ -358,42 +338,17 @@ compression : snappy # https://github.com/facebook/rocksdb/wiki/Compression #compression_per_level : [none:none:snappy:lz4:lz4] -# The number of rocksdb background threads(sum of max-background-compactions and max-background-flushes) -# If max-background-jobs has a valid value AND both 'max-background-flushs' and 'max-background-compactions' is set to -1, -# then max-background-flushs' and 'max-background-compactions will be auto config by rocksdb, specifically: -# 1/4 of max-background-jobs will be given to max-background-flushs' and the rest(3/4) will be given to 'max-background-compactions'. -# 'max-background-jobs' default value is 3 and the value range is [2, 12]. -max-background-jobs : 3 - # The number of background flushing threads. -# max-background-flushes default value is -1 and the value range is [1, 4] or -1. -# if 'max-background-flushes' is set to -1, the 'max-background-compactions' should also be set to -1, -# which means let rocksdb to auto config them based on the value of 'max-background-jobs' -max-background-flushes : -1 - -# [NOTICE] you MUST NOT set one of the max-background-flushes or max-background-compactions to -1 while setting another one to other values(not -1). -# They SHOULD both be -1 or both not(if you want to config them manually). +# max-background-flushes default value is 1 and the value range is [1, 4]. +max-background-flushes : 1 # The number of background compacting threads. -# max-background-compactions default value is -1 and the value range is [1, 8] or -1. -# if 'max-background-compactions' is set to -1, the 'max-background-flushes' should also be set to -1, -# which means let rocksdb to auto config them based on the value of 'max-background-jobs' -max-background-compactions : -1 - -# RocksDB delayed-write-rate, default is 0(infer from rate-limiter by RocksDB) -# Ref from rocksdb: Whenever stall conditions are triggered, RocksDB will reduce write rate to delayed_write_rate, -# and could possibly reduce write rate to even lower than delayed_write_rate if estimated pending compaction bytes accumulates. -# If the value is 0, RcoksDB will infer a value from `rater_limiter` value if it is not empty, or 16MB if `rater_limiter` is empty. -# Note that if users change the rate in `rate_limiter` after DB is opened, delayed_write_rate won't be adjusted. -# [Support Dynamically changeable] send 'config set delayed-write-rate' to a running pika can change it's value dynamically -delayed-write-rate : 0 - - -# RocksDB will try to limit number of bytes in one compaction to be lower than this max-compaction-bytes. -# But it's NOT guaranteed. -# default value is -1, means let it be 25 * target-file-size-base (Which is RocksDB's default value) -max-compaction-bytes : -1 +# max-background-compactions default value is 2 and the value range is [1, 8]. +max-background-compactions : 2 +# The number of background threads. +# max-background-jobs default value is 3 and the value range is [2, 12]. +max-background-jobs : 3 # maximum value of RocksDB cached open file descriptors max-cache-files : 5000 @@ -459,17 +414,14 @@ default-slot-num : 1024 # 0: Read 1: Write 2: ReadAndWrite # rate-limiter-mode : default 1 -# rate limiter bandwidth, units in bytes, default 1024GB/s (No limit) -# [Support Dynamically changeable] send 'rate-limiter-bandwidth' to a running pika can change it's value dynamically -#rate-limiter-bandwidth : 1099511627776 +# rate limiter bandwidth, default 2000MB/s +#rate-limiter-bandwidth : 2097152000 #rate-limiter-refill-period-us : 100000 # #rate-limiter-fairness: 10 -# if auto_tuned is true: Enables dynamic adjustment of rate limit within the range -#`[rate-limiter-bandwidth / 20, rate-limiter-bandwidth]`, according to the recent demand for background I/O. -# rate limiter auto tune https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html. the default value is true. +# rate limiter auto tune https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html. the default value is false. #rate-limiter-auto-tuned : true ################################## RocksDB Blob Configure ##################### @@ -514,14 +466,9 @@ default-slot-num : 1024 # The cache will be sharded into 2^blob-num-shard-bits shards. # blob-num-shard-bits : -1 -# Rsync Rate limiting configuration [Default value is 200MB/s] -# [USED BY SLAVE] The transmitting speed(Rsync Rate) In full replication is controlled BY SLAVE NODE, You should modify the throttle-bytes-per-second in slave's pika.conf if you wanna change the rsync rate limit. -# [Dynamic Change Supported] send command 'config set throttle-bytes-per-second new_value' to SLAVE NODE can dynamically adjust rsync rate during full sync(use config rewrite can persist the changes). +# Rsync Rate limiting configuration 200MB/s throttle-bytes-per-second : 207200000 -# Rsync timeout in full sync stage[Default value is 1000 ms], unnecessary retries will happen if this value is too small. -# [Dynamic Change Supported] similar to throttle-bytes-per-second, rsync-timeout-ms can be dynamically changed by configset command -# [USED BY SLAVE] Similar to throttle-bytes-per-second, you should change rsync-timeout-ms's value in slave's conf file if it is needed to adjust. -rsync-timeout-ms : 1000 + # The valid range for max-rsync-parallel-num is [1, 4]. # If an invalid value is provided, max-rsync-parallel-num will automatically be reset to 4. max-rsync-parallel-num : 4 @@ -620,4 +567,4 @@ cache-lfu-decay-time: 1 # Warning: Ensure that the Settings of rename-command on the master and slave servers are consistent # # Example: -# rename-command : FLUSHDB 360flushdb +# rename-command : FLUSHDB 360flushdb \ No newline at end of file