From 6d42c6e24c3bc752ffcaa331df48b5a41eb763b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Thu, 20 Jul 2023 09:49:27 +0800 Subject: [PATCH] Feature: permit follower log to revert to earlier state with `--features loosen-follower-log-revert` Add a new feature flag `loosen-follower-log-revert`, to permit the follower's log to roll back to an earlier state without causing the leader to panic. Although log state reversion is typically seen as a bug, enabling it can be useful for testing or in some special scenarios. For instance, in an even number nodes cluster, erasing a node's data and then rebooting it(log reverts to empty) will not result in data loss. - Related issue: #898 --- .github/workflows/ci.yaml | 3 + openraft/Cargo.toml | 6 ++ openraft/src/docs/faq/faq.md | 50 ++++++++++++++- .../src/docs/feature_flags/feature-flags.md | 21 ++++++- openraft/src/progress/entry/mod.rs | 22 +++++++ tests/Cargo.toml | 1 + tests/tests/README.md | 2 +- tests/tests/replication/main.rs | 2 + .../t60_feature_loosen_follower_log_revert.rs | 63 +++++++++++++++++++ 9 files changed, 163 insertions(+), 7 deletions(-) create mode 100644 tests/tests/replication/t60_feature_loosen_follower_log_revert.rs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d684243a6..7e6dc84ab 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -321,6 +321,9 @@ jobs: - toolchain: "nightly" features: "single-term-leader" + - toolchain: "nightly" + features: "loosen-follower-log-revert" + steps: - name: Setup | Checkout diff --git a/openraft/Cargo.toml b/openraft/Cargo.toml index 60f5d3b18..e91f17b69 100644 --- a/openraft/Cargo.toml +++ b/openraft/Cargo.toml @@ -87,6 +87,12 @@ storage-v2 = [] # Disallows applications to share a raft instance with multiple threads. singlethreaded = ["macros/singlethreaded"] + +# Permit the follower's log to roll back to an earlier state without causing the leader to panic. +# Although log state reversion is typically seen as a bug, enabling it can be useful for testing or other special scenarios. +# For instance, in an even number nodes cluster, erasing a node's data and then rebooting it(log reverts to empty) will not result in data loss. +loosen-follower-log-revert = [] + # default = ["single-term-leader"] [package.metadata.docs.rs] diff --git a/openraft/src/docs/faq/faq.md b/openraft/src/docs/faq/faq.md index 4cb011675..ee62e2297 100644 --- a/openraft/src/docs/faq/faq.md +++ b/openraft/src/docs/faq/faq.md @@ -1,8 +1,52 @@ # FAQ -- Q: 🤔 Why is log id `(term, node_id, log_index)`, while standard Raft uses just - `(term, log_index)`? +- **🤔 Why is log id a tuple of `(term, node_id, log_index)`, while standard Raft uses just + `(term, log_index)`**? - A: The log id `(term, node_id, log_index)` is used to minimize the chance of election conflicts. + 💡 The log id `(term, node_id, log_index)` is used to minimize the chance of election conflicts. This way in every term there could be more than one leaders elected, and the last one is valid. See: [`leader-id`](`crate::docs::data::leader_id`) for details. +

+ + +- **🤔 How to remove node-2 safely from a cluster `{1, 2, 3}`**? + + 💡 Call `Raft::change_membership(btreeset!{1, 3})` to exclude node-2 from + the cluster. Then wipe out node-2 data. + **NEVER** modify/erase the data of any node that is still in a raft cluster, unless you know what you are doing. +

+ + +- **🤔 Can I wipe out the data of **one** node and wait for the leader to replicate all data to it again**? + + 💡 Avoid doing this. Doing so will panic the leader. But it is permitted + if [`loosen-follower-log-revert`] feature flag is enabled. + + In a raft cluster, although logs are replicated to multiple nodes, + wiping out a node and restarting it is still possible to cause data loss. + Assumes the leader is `N1`, followers are `N2, N3, N4, N5`: + - A log(`a`) that is replicated by `N1` to `N2, N3` is considered committed. + - At this point, if `N3` is replaced with an empty node, and at once the leader `N1` is crashed. Then `N5` may elected as a new leader with granted vote by `N3, N4`; + - Then the new leader `N5` will not have log `a`. + + ```text + Ni: Node i + Lj: Leader at term j + Fj: Follower at term j + + N1 | L1 a crashed + N2 | F1 a + N3 | F1 a erased F2 + N4 | F2 + N5 | elect L2 + ----------------------------+---------------> time + Data loss: N5 does not have log `a` + ``` + + But for even number nodes cluster, Erasing **exactly one** node won't cause data loss. + Thus, in a special scenario like this, or for testing purpose, you can use + `--feature loosen-follower-log-revert` to permit erasing a node. +

+ + +[`loosen-follower-log-revert`]: `crate::docs::feature_flags#loosen_follower_log_revert` diff --git a/openraft/src/docs/feature_flags/feature-flags.md b/openraft/src/docs/feature_flags/feature-flags.md index adb608710..4c2f0b08c 100644 --- a/openraft/src/docs/feature_flags/feature-flags.md +++ b/openraft/src/docs/feature_flags/feature-flags.md @@ -2,15 +2,27 @@ By default openraft enables no features. -- `bt`: attaches backtrace to generated errors. +- `bt`: + attaches backtrace to generated errors. +

+ +- `loosen-follower-log-revert`: + Permit the follower's log to roll back to an earlier state without causing the leader to panic. + Although log state reversion is typically seen as a bug, enabling it can be useful for testing or other special scenarios. + For instance, in an even number nodes cluster, erasing a node's data and then rebooting it(log reverts to empty) will not result in data loss. + + **Do not use it unless you know what you are doing**. +

- `serde`: derives `serde::Serialize, serde::Deserialize` for type that are used in storage and network, such as `Vote` or `AppendEntriesRequest`. +

- `single-term-leader`: allows only one leader to be elected in each `term`. This is the standard raft policy, which increases election confliction rate but reduce `LogId`(`(term, node_id, index)` to `(term, index)`) size. Read more about how it is implemented in [`vote`](./vote.md) +

- `compat-07`: provides additional data types to build v0.7 compatible RaftStorage. @@ -18,13 +30,16 @@ By default openraft enables no features. compat-07 = ["compat", "single-term-leader", "serde", "dep:or07", "compat-07-testing"] compat-07-testing = ["dep:tempdir", "anyhow", "dep:serde_json"] ``` +

- `storage-v2`: enables `RaftLogStorage` and `RaftStateMachine` as the v2 storage This is a temporary feature flag, and will be removed in the future, when v2 storage is stable. This feature disables `Adapter`, which is for v1 storage to be used as v2. - V2 storage separates log store and state machine store so that log IO and state machine IO can be parallelized naturally. + V2 storage separates log store and state machine store so that log IO and state machine IO can be parallelized naturally. +

- `singlethreaded`: removes `Send` bounds from `AppData`, `AppDataResponse`, `RaftEntry`, and `SnapshotData` to force the asynchronous runtime to spawn any tasks in the current thread. This is for any single-threaded application that never allows a raft instance to be shared among multiple threads. - In order to use the feature, `AsyncRuntime::spawn` should invoke `tokio::task::spawn_local` or equivalents. \ No newline at end of file + In order to use the feature, `AsyncRuntime::spawn` should invoke `tokio::task::spawn_local` or equivalents. +

diff --git a/openraft/src/progress/entry/mod.rs b/openraft/src/progress/entry/mod.rs index 0dba3c253..14b981185 100644 --- a/openraft/src/progress/entry/mod.rs +++ b/openraft/src/progress/entry/mod.rs @@ -5,6 +5,7 @@ use std::fmt::Display; use std::fmt::Formatter; use std::ops::Deref; +use crate::display_ext::DisplayOptionExt; use crate::less; use crate::less_equal; use crate::progress::inflight::Inflight; @@ -124,6 +125,27 @@ impl ProgressEntry { debug_assert!(conflict < self.searching_end); self.searching_end = conflict; + // An already matching log id is found lost: + // + // - If log reversion is allowed, just restart the binary search from the beginning. + // - Otherwise, panic it. + // + // Refer to: `docs::feature_flags#loosen_follower_log_revert` + { + #[cfg(feature = "loosen-follower-log-revert")] + if conflict < self.matching.next_index() { + self.matching = None; + } + + debug_assert!( + conflict >= self.matching.next_index(), + "follower log reversion is not allowed \ + without `--features loosen-follower-log-revert`; \ + matching: {}; conflict: {}", + self.matching.display(), + conflict + ); + } Ok(()) } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index f053bd110..1c421fb81 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -39,3 +39,4 @@ tracing-subscriber = { workspace = true } bt = ["openraft/bt"] single-term-leader = ["openraft/single-term-leader"] +loosen-follower-log-revert = ["openraft/loosen-follower-log-revert"] diff --git a/tests/tests/README.md b/tests/tests/README.md index af6434c04..a908d311c 100644 --- a/tests/tests/README.md +++ b/tests/tests/README.md @@ -11,7 +11,7 @@ integration tests to a separate crate. A test file name starts with `t[\d\d]_`, where `\d\d` is the test case number indicating priority. -- `t00`: no used. +- `t00`: not used. - `t10`: basic behaviors. - `t20`: life cycle test cases. - `t30`: special cases for an API. diff --git a/tests/tests/replication/main.rs b/tests/tests/replication/main.rs index bfc7fd20e..72c1c97b5 100644 --- a/tests/tests/replication/main.rs +++ b/tests/tests/replication/main.rs @@ -8,3 +8,5 @@ mod fixtures; mod t10_append_entries_partial_success; mod t50_append_entries_backoff; mod t50_append_entries_backoff_rejoin; +#[cfg(feature = "loosen-follower-log-revert")] +mod t60_feature_loosen_follower_log_revert; diff --git a/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs b/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs new file mode 100644 index 000000000..e5d7e01ff --- /dev/null +++ b/tests/tests/replication/t60_feature_loosen_follower_log_revert.rs @@ -0,0 +1,63 @@ +use std::sync::Arc; +use std::time::Duration; + +use anyhow::Result; +use maplit::btreeset; +use openraft::Config; +use openraft_memstore::MemStore; + +use crate::fixtures::init_default_ut_tracing; +use crate::fixtures::RaftRouter; + +/// With "--features loosen-follower-log-revert", the leader allows follower to revert its log to an +/// earlier state. +#[async_entry::test(worker_threads = 4, init = "init_default_ut_tracing()", tracing_span = "debug")] +async fn feature_loosen_follower_log_revert() -> Result<()> { + let config = Arc::new( + Config { + enable_tick: false, + enable_heartbeat: false, + ..Default::default() + } + .validate()?, + ); + + let mut router = RaftRouter::new(config.clone()); + + tracing::info!("--- initializing cluster"); + let mut log_index = router.new_cluster(btreeset! {0,1,2}, btreeset! {3}).await?; + + tracing::info!(log_index, "--- write 10 logs"); + { + log_index += router.client_request_many(0, "0", 10).await?; + for i in [0, 1, 2, 3] { + router.wait(&i, timeout()).log(Some(log_index), format!("{} writes", 10)).await?; + } + } + + tracing::info!(log_index, "--- erase node 3 and restart"); + { + let (_raft, ls, sm) = router.remove_node(3).unwrap(); + { + let mut sto = ls.storage_mut().await; + *sto = Arc::new(MemStore::new()); + } + router.new_raft_node_with_sto(3, ls, sm).await; + router.add_learner(0, 3).await?; + log_index += 1; // add learner + } + + tracing::info!(log_index, "--- write another 10 logs, leader should not panic"); + { + log_index += router.client_request_many(0, "0", 10).await?; + for i in [0, 1, 2, 3] { + router.wait(&i, timeout()).log(Some(log_index), format!("{} writes", 10)).await?; + } + } + + Ok(()) +} + +fn timeout() -> Option { + Some(Duration::from_millis(1_000)) +}