Skip to content

Commit

Permalink
refactor: Enhance Meta-Service Cluster Management (#15601)
Browse files Browse the repository at this point in the history
This commit introduces several optimizations and changes to the
meta-service cluster management:

- **Disallow Removing the Leader Directly:**
  - It is now disallowed to remove the leader from the cluster directly,
    because when leader is removed, the second step of removing a node can not be finished soon.
    To properly remove a leader, the leader node should be shut down first.
    After a new leader is established, use the command
    `databend-meta --leave-id <old-leader-node-id> --leave-via <new-leader-node-raft-address>`
    to remove the old leader from the cluster.

    Now removing a leader resulted in the error `"can not leave id=<i> via itself"`.

- **Error Collection During Node Removal:**
  - Errors that occur during the process of removing a node are now
    collected and logged, enhancing error management and diagnostics.

- **Adjustment of Time Configurations:**
  - **Heartbeat Interval:** Reduced from 1,000 milliseconds to 500
    milliseconds, improving the responsiveness of the cluster to state
    changes.
  - **Election Timing for Followers:** Adjusted the duration a follower
    waits before starting an election process from a range of `[12,000
    ms, 14,000 ms)` to a shorter range of `[2,500 ms, 3,000 ms)`,
    increasing the cluster's ability to quickly recover from leader
    failures.
  • Loading branch information
drmingdrmer authored May 21, 2024
1 parent e2ab1a2 commit b6bbe58
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/meta/raft-store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ impl RaftConfig {
///
/// Raft will choose a random timeout in this range for next election.
pub fn election_timeout(&self) -> (u64, u64) {
(self.heartbeat_interval * 5, self.heartbeat_interval * 7)
(self.heartbeat_interval * 2, self.heartbeat_interval * 3)
}

pub fn check(&self) -> std::result::Result<(), MetaStartupError> {
Expand Down
6 changes: 5 additions & 1 deletion src/meta/service/src/configs/outer_v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,11 @@ pub struct RaftConfig {

/// The interval in milli seconds at which a leader send heartbeat message to followers.
/// Different value of this setting on leader and followers may cause unexpected behavior.
#[clap(long, default_value = "1000")]
/// This value `t` also affect the election timeout:
/// Election timeout is a random between `[t*2, t*3)`,
/// i.e., a node start to elect in `[t*2, t*3)` without RequestVote from Candidate.
/// And a follower starts to elect after `[t*5, t*6)` without heartbeat from Leader.
#[clap(long, default_value = "500")]
pub heartbeat_interval: u64,

/// The max time in milli seconds that a leader wait for install-snapshot ack from a follower or non-voter.
Expand Down
11 changes: 11 additions & 0 deletions src/meta/service/src/meta_service/meta_leader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::collections::BTreeSet;

use anyerror::AnyError;
use databend_common_base::base::tokio::sync::RwLockReadGuard;
use databend_common_meta_client::MetaGrpcReadReq;
use databend_common_meta_kvapi::kvapi::KVApi;
Expand Down Expand Up @@ -219,6 +220,16 @@ impl<'a> MetaLeader<'a> {
pub async fn leave(&self, req: LeaveRequest) -> Result<(), MetaOperationError> {
let node_id = req.node_id;

if node_id == self.sto.id {
return Err(MetaOperationError::DataError(MetaDataError::ReadError(
MetaDataReadError::new(
"leave",
format!("can not leave id={} via itself", node_id),
&AnyError::error("leave-via-self"),
),
)));
}

let can_res = self
.can_leave(node_id)
.await
Expand Down
5 changes: 5 additions & 0 deletions src/meta/service/src/meta_service/meta_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,11 @@ impl MetaNode {
return Ok(true);
} else {
error!("leaving cluster via {} fail: {:?}", addr, reply.error);
errors.push(
AnyError::error(reply.error).add_context(|| {
format!("leave {} via: {}", leave_id, addr.clone())
}),
);
}
}
Err(s) => {
Expand Down

0 comments on commit b6bbe58

Please sign in to comment.